diff options
author | J. Bruce Fields <bfields@redhat.com> | 2014-11-19 12:06:30 -0500 |
---|---|---|
committer | J. Bruce Fields <bfields@redhat.com> | 2014-11-19 12:06:30 -0500 |
commit | 56429e9b3be567a173bd05f5594faf8522c34d3a (patch) | |
tree | d218d430ed992cdfa42da084bf36e5aa3c2ecb26 /fs | |
parent | 5b095e99928cc13332d364f7cca7a9ca684369b4 (diff) | |
parent | 093a1468b6edb0e568be7311b8d2228d205702db (diff) |
merge nfs bugfixes into nfsd for-3.19 branch
In addition to nfsd bugfixes, there are some fixes in -rc5 for client
bugs that can interfere with my testing.
Diffstat (limited to 'fs')
100 files changed, 5291 insertions, 2392 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index db5dc1598716..664991afe0c0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -67,6 +67,7 @@ source "fs/quota/Kconfig" | |||
67 | 67 | ||
68 | source "fs/autofs4/Kconfig" | 68 | source "fs/autofs4/Kconfig" |
69 | source "fs/fuse/Kconfig" | 69 | source "fs/fuse/Kconfig" |
70 | source "fs/overlayfs/Kconfig" | ||
70 | 71 | ||
71 | menu "Caches" | 72 | menu "Caches" |
72 | 73 | ||
diff --git a/fs/Makefile b/fs/Makefile index 90c88529892b..34a1b9dea6dd 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ | |||
104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ | 104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ |
105 | obj-$(CONFIG_ADFS_FS) += adfs/ | 105 | obj-$(CONFIG_ADFS_FS) += adfs/ |
106 | obj-$(CONFIG_FUSE_FS) += fuse/ | 106 | obj-$(CONFIG_FUSE_FS) += fuse/ |
107 | obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ | ||
107 | obj-$(CONFIG_UDF_FS) += udf/ | 108 | obj-$(CONFIG_UDF_FS) += udf/ |
108 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ | 109 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ |
109 | obj-$(CONFIG_OMFS_FS) += omfs/ | 110 | obj-$(CONFIG_OMFS_FS) += omfs/ |
diff --git a/fs/block_dev.c b/fs/block_dev.c index cc9d4114cda0..1d9c9f3754f8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1585,7 +1585,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
1585 | } | 1585 | } |
1586 | EXPORT_SYMBOL_GPL(blkdev_write_iter); | 1586 | EXPORT_SYMBOL_GPL(blkdev_write_iter); |
1587 | 1587 | ||
1588 | static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) | 1588 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) |
1589 | { | 1589 | { |
1590 | struct file *file = iocb->ki_filp; | 1590 | struct file *file = iocb->ki_filp; |
1591 | struct inode *bd_inode = file->f_mapping->host; | 1591 | struct inode *bd_inode = file->f_mapping->host; |
@@ -1599,6 +1599,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
1599 | iov_iter_truncate(to, size); | 1599 | iov_iter_truncate(to, size); |
1600 | return generic_file_read_iter(iocb, to); | 1600 | return generic_file_read_iter(iocb, to); |
1601 | } | 1601 | } |
1602 | EXPORT_SYMBOL_GPL(blkdev_read_iter); | ||
1602 | 1603 | ||
1603 | /* | 1604 | /* |
1604 | * Try to release a page associated with block device when the system | 1605 | * Try to release a page associated with block device when the system |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d557264ee974..fe69edda11fb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -3276,7 +3276,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
3276 | struct btrfs_root *root, unsigned long count); | 3276 | struct btrfs_root *root, unsigned long count); |
3277 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, | 3277 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, |
3278 | unsigned long count, int wait); | 3278 | unsigned long count, int wait); |
3279 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 3279 | int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); |
3280 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | 3280 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, |
3281 | struct btrfs_root *root, u64 bytenr, | 3281 | struct btrfs_root *root, u64 bytenr, |
3282 | u64 offset, int metadata, u64 *refs, u64 *flags); | 3282 | u64 offset, int metadata, u64 *refs, u64 *flags); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ad0f47ac850..1bf9f897065d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -3817,19 +3817,19 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3817 | struct btrfs_super_block *sb = fs_info->super_copy; | 3817 | struct btrfs_super_block *sb = fs_info->super_copy; |
3818 | int ret = 0; | 3818 | int ret = 0; |
3819 | 3819 | ||
3820 | if (sb->root_level > BTRFS_MAX_LEVEL) { | 3820 | if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { |
3821 | printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", | 3821 | printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n", |
3822 | sb->root_level, BTRFS_MAX_LEVEL); | 3822 | btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); |
3823 | ret = -EINVAL; | 3823 | ret = -EINVAL; |
3824 | } | 3824 | } |
3825 | if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { | 3825 | if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { |
3826 | printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", | 3826 | printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n", |
3827 | sb->chunk_root_level, BTRFS_MAX_LEVEL); | 3827 | btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); |
3828 | ret = -EINVAL; | 3828 | ret = -EINVAL; |
3829 | } | 3829 | } |
3830 | if (sb->log_root_level > BTRFS_MAX_LEVEL) { | 3830 | if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { |
3831 | printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", | 3831 | printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n", |
3832 | sb->log_root_level, BTRFS_MAX_LEVEL); | 3832 | btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); |
3833 | ret = -EINVAL; | 3833 | ret = -EINVAL; |
3834 | } | 3834 | } |
3835 | 3835 | ||
@@ -3837,15 +3837,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3837 | * The common minimum, we don't know if we can trust the nodesize/sectorsize | 3837 | * The common minimum, we don't know if we can trust the nodesize/sectorsize |
3838 | * items yet, they'll be verified later. Issue just a warning. | 3838 | * items yet, they'll be verified later. Issue just a warning. |
3839 | */ | 3839 | */ |
3840 | if (!IS_ALIGNED(sb->root, 4096)) | 3840 | if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) |
3841 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3841 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", |
3842 | sb->root); | 3842 | sb->root); |
3843 | if (!IS_ALIGNED(sb->chunk_root, 4096)) | 3843 | if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) |
3844 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3844 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", |
3845 | sb->chunk_root); | 3845 | sb->chunk_root); |
3846 | if (!IS_ALIGNED(sb->log_root, 4096)) | 3846 | if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) |
3847 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3847 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", |
3848 | sb->log_root); | 3848 | btrfs_super_log_root(sb)); |
3849 | 3849 | ||
3850 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { | 3850 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { |
3851 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", | 3851 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", |
@@ -3857,13 +3857,13 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3857 | * Hint to catch really bogus numbers, bitflips or so, more exact checks are | 3857 | * Hint to catch really bogus numbers, bitflips or so, more exact checks are |
3858 | * done later | 3858 | * done later |
3859 | */ | 3859 | */ |
3860 | if (sb->num_devices > (1UL << 31)) | 3860 | if (btrfs_super_num_devices(sb) > (1UL << 31)) |
3861 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", | 3861 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", |
3862 | sb->num_devices); | 3862 | btrfs_super_num_devices(sb)); |
3863 | 3863 | ||
3864 | if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { | 3864 | if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { |
3865 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", | 3865 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", |
3866 | sb->bytenr, BTRFS_SUPER_INFO_OFFSET); | 3866 | btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); |
3867 | ret = -EINVAL; | 3867 | ret = -EINVAL; |
3868 | } | 3868 | } |
3869 | 3869 | ||
@@ -3871,14 +3871,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
3871 | * The generation is a global counter, we'll trust it more than the others | 3871 | * The generation is a global counter, we'll trust it more than the others |
3872 | * but it's still possible that it's the one that's wrong. | 3872 | * but it's still possible that it's the one that's wrong. |
3873 | */ | 3873 | */ |
3874 | if (sb->generation < sb->chunk_root_generation) | 3874 | if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) |
3875 | printk(KERN_WARNING | 3875 | printk(KERN_WARNING |
3876 | "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", | 3876 | "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", |
3877 | sb->generation, sb->chunk_root_generation); | 3877 | btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); |
3878 | if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) | 3878 | if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) |
3879 | && btrfs_super_cache_generation(sb) != (u64)-1) | ||
3879 | printk(KERN_WARNING | 3880 | printk(KERN_WARNING |
3880 | "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", | 3881 | "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", |
3881 | sb->generation, sb->cache_generation); | 3882 | btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); |
3882 | 3883 | ||
3883 | return ret; | 3884 | return ret; |
3884 | } | 3885 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d56589571012..47c1ba141082 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -710,8 +710,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | |||
710 | rcu_read_unlock(); | 710 | rcu_read_unlock(); |
711 | } | 711 | } |
712 | 712 | ||
713 | /* simple helper to search for an existing extent at a given offset */ | 713 | /* simple helper to search for an existing data extent at a given offset */ |
714 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | 714 | int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len) |
715 | { | 715 | { |
716 | int ret; | 716 | int ret; |
717 | struct btrfs_key key; | 717 | struct btrfs_key key; |
@@ -726,12 +726,6 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
726 | key.type = BTRFS_EXTENT_ITEM_KEY; | 726 | key.type = BTRFS_EXTENT_ITEM_KEY; |
727 | ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, | 727 | ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, |
728 | 0, 0); | 728 | 0, 0); |
729 | if (ret > 0) { | ||
730 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
731 | if (key.objectid == start && | ||
732 | key.type == BTRFS_METADATA_ITEM_KEY) | ||
733 | ret = 0; | ||
734 | } | ||
735 | btrfs_free_path(path); | 729 | btrfs_free_path(path); |
736 | return ret; | 730 | return ret; |
737 | } | 731 | } |
@@ -786,7 +780,6 @@ search_again: | |||
786 | else | 780 | else |
787 | key.type = BTRFS_EXTENT_ITEM_KEY; | 781 | key.type = BTRFS_EXTENT_ITEM_KEY; |
788 | 782 | ||
789 | again: | ||
790 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | 783 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, |
791 | &key, path, 0, 0); | 784 | &key, path, 0, 0); |
792 | if (ret < 0) | 785 | if (ret < 0) |
@@ -802,13 +795,6 @@ again: | |||
802 | key.offset == root->nodesize) | 795 | key.offset == root->nodesize) |
803 | ret = 0; | 796 | ret = 0; |
804 | } | 797 | } |
805 | if (ret) { | ||
806 | key.objectid = bytenr; | ||
807 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
808 | key.offset = root->nodesize; | ||
809 | btrfs_release_path(path); | ||
810 | goto again; | ||
811 | } | ||
812 | } | 798 | } |
813 | 799 | ||
814 | if (ret == 0) { | 800 | if (ret == 0) { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 783a94355efd..84a2d1868271 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
413 | ret = 0; | 413 | ret = 0; |
414 | fail: | 414 | fail: |
415 | while (ret < 0 && !list_empty(&tmplist)) { | 415 | while (ret < 0 && !list_empty(&tmplist)) { |
416 | sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); | 416 | sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); |
417 | list_del(&sums->list); | 417 | list_del(&sums->list); |
418 | kfree(sums); | 418 | kfree(sums); |
419 | } | 419 | } |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d2b76e29d3b..4399f0c3a4ce 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -765,23 +765,6 @@ out: | |||
765 | return ret; | 765 | return ret; |
766 | } | 766 | } |
767 | 767 | ||
768 | /* copy of check_sticky in fs/namei.c() | ||
769 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
770 | * minimal. | ||
771 | */ | ||
772 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | ||
773 | { | ||
774 | kuid_t fsuid = current_fsuid(); | ||
775 | |||
776 | if (!(dir->i_mode & S_ISVTX)) | ||
777 | return 0; | ||
778 | if (uid_eq(inode->i_uid, fsuid)) | ||
779 | return 0; | ||
780 | if (uid_eq(dir->i_uid, fsuid)) | ||
781 | return 0; | ||
782 | return !capable(CAP_FOWNER); | ||
783 | } | ||
784 | |||
785 | /* copy of may_delete in fs/namei.c() | 768 | /* copy of may_delete in fs/namei.c() |
786 | * Check whether we can remove a link victim from directory dir, check | 769 | * Check whether we can remove a link victim from directory dir, check |
787 | * whether the type of victim is right. | 770 | * whether the type of victim is right. |
@@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) | |||
817 | return error; | 800 | return error; |
818 | if (IS_APPEND(dir)) | 801 | if (IS_APPEND(dir)) |
819 | return -EPERM; | 802 | return -EPERM; |
820 | if (btrfs_check_sticky(dir, victim->d_inode)|| | 803 | if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) || |
821 | IS_APPEND(victim->d_inode)|| | ||
822 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) | 804 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) |
823 | return -EPERM; | 805 | return -EPERM; |
824 | if (isdir) { | 806 | if (isdir) { |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a2b97ef10317..54bd91ece35b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -2151,6 +2151,7 @@ static void __exit exit_btrfs_fs(void) | |||
2151 | extent_map_exit(); | 2151 | extent_map_exit(); |
2152 | extent_io_exit(); | 2152 | extent_io_exit(); |
2153 | btrfs_interface_exit(); | 2153 | btrfs_interface_exit(); |
2154 | btrfs_end_io_wq_exit(); | ||
2154 | unregister_filesystem(&btrfs_fs_type); | 2155 | unregister_filesystem(&btrfs_fs_type); |
2155 | btrfs_exit_sysfs(); | 2156 | btrfs_exit_sysfs(); |
2156 | btrfs_cleanup_fs_uuids(); | 2157 | btrfs_cleanup_fs_uuids(); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1475979e5718..286213cec861 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -672,7 +672,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
672 | * is this extent already allocated in the extent | 672 | * is this extent already allocated in the extent |
673 | * allocation tree? If so, just add a reference | 673 | * allocation tree? If so, just add a reference |
674 | */ | 674 | */ |
675 | ret = btrfs_lookup_extent(root, ins.objectid, | 675 | ret = btrfs_lookup_data_extent(root, ins.objectid, |
676 | ins.offset); | 676 | ins.offset); |
677 | if (ret == 0) { | 677 | if (ret == 0) { |
678 | ret = btrfs_inc_extent_ref(trans, root, | 678 | ret = btrfs_inc_extent_ref(trans, root, |
diff --git a/fs/buffer.c b/fs/buffer.c index 9614adc7e754..20805db2c987 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -128,21 +128,15 @@ __clear_page_buffers(struct page *page) | |||
128 | page_cache_release(page); | 128 | page_cache_release(page); |
129 | } | 129 | } |
130 | 130 | ||
131 | 131 | static void buffer_io_error(struct buffer_head *bh, char *msg) | |
132 | static int quiet_error(struct buffer_head *bh) | ||
133 | { | ||
134 | if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit()) | ||
135 | return 0; | ||
136 | return 1; | ||
137 | } | ||
138 | |||
139 | |||
140 | static void buffer_io_error(struct buffer_head *bh) | ||
141 | { | 132 | { |
142 | char b[BDEVNAME_SIZE]; | 133 | char b[BDEVNAME_SIZE]; |
143 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", | 134 | |
135 | if (!test_bit(BH_Quiet, &bh->b_state)) | ||
136 | printk_ratelimited(KERN_ERR | ||
137 | "Buffer I/O error on dev %s, logical block %llu%s\n", | ||
144 | bdevname(bh->b_bdev, b), | 138 | bdevname(bh->b_bdev, b), |
145 | (unsigned long long)bh->b_blocknr); | 139 | (unsigned long long)bh->b_blocknr, msg); |
146 | } | 140 | } |
147 | 141 | ||
148 | /* | 142 | /* |
@@ -177,17 +171,10 @@ EXPORT_SYMBOL(end_buffer_read_sync); | |||
177 | 171 | ||
178 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 172 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
179 | { | 173 | { |
180 | char b[BDEVNAME_SIZE]; | ||
181 | |||
182 | if (uptodate) { | 174 | if (uptodate) { |
183 | set_buffer_uptodate(bh); | 175 | set_buffer_uptodate(bh); |
184 | } else { | 176 | } else { |
185 | if (!quiet_error(bh)) { | 177 | buffer_io_error(bh, ", lost sync page write"); |
186 | buffer_io_error(bh); | ||
187 | printk(KERN_WARNING "lost page write due to " | ||
188 | "I/O error on %s\n", | ||
189 | bdevname(bh->b_bdev, b)); | ||
190 | } | ||
191 | set_buffer_write_io_error(bh); | 178 | set_buffer_write_io_error(bh); |
192 | clear_buffer_uptodate(bh); | 179 | clear_buffer_uptodate(bh); |
193 | } | 180 | } |
@@ -304,8 +291,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
304 | set_buffer_uptodate(bh); | 291 | set_buffer_uptodate(bh); |
305 | } else { | 292 | } else { |
306 | clear_buffer_uptodate(bh); | 293 | clear_buffer_uptodate(bh); |
307 | if (!quiet_error(bh)) | 294 | buffer_io_error(bh, ", async page read"); |
308 | buffer_io_error(bh); | ||
309 | SetPageError(page); | 295 | SetPageError(page); |
310 | } | 296 | } |
311 | 297 | ||
@@ -353,7 +339,6 @@ still_busy: | |||
353 | */ | 339 | */ |
354 | void end_buffer_async_write(struct buffer_head *bh, int uptodate) | 340 | void end_buffer_async_write(struct buffer_head *bh, int uptodate) |
355 | { | 341 | { |
356 | char b[BDEVNAME_SIZE]; | ||
357 | unsigned long flags; | 342 | unsigned long flags; |
358 | struct buffer_head *first; | 343 | struct buffer_head *first; |
359 | struct buffer_head *tmp; | 344 | struct buffer_head *tmp; |
@@ -365,12 +350,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |||
365 | if (uptodate) { | 350 | if (uptodate) { |
366 | set_buffer_uptodate(bh); | 351 | set_buffer_uptodate(bh); |
367 | } else { | 352 | } else { |
368 | if (!quiet_error(bh)) { | 353 | buffer_io_error(bh, ", lost async page write"); |
369 | buffer_io_error(bh); | ||
370 | printk(KERN_WARNING "lost page write due to " | ||
371 | "I/O error on %s\n", | ||
372 | bdevname(bh->b_bdev, b)); | ||
373 | } | ||
374 | set_bit(AS_EIO, &page->mapping->flags); | 354 | set_bit(AS_EIO, &page->mapping->flags); |
375 | set_buffer_write_io_error(bh); | 355 | set_buffer_write_io_error(bh); |
376 | clear_buffer_uptodate(bh); | 356 | clear_buffer_uptodate(bh); |
@@ -993,7 +973,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
993 | */ | 973 | */ |
994 | static int | 974 | static int |
995 | grow_dev_page(struct block_device *bdev, sector_t block, | 975 | grow_dev_page(struct block_device *bdev, sector_t block, |
996 | pgoff_t index, int size, int sizebits) | 976 | pgoff_t index, int size, int sizebits, gfp_t gfp) |
997 | { | 977 | { |
998 | struct inode *inode = bdev->bd_inode; | 978 | struct inode *inode = bdev->bd_inode; |
999 | struct page *page; | 979 | struct page *page; |
@@ -1002,8 +982,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
1002 | int ret = 0; /* Will call free_more_memory() */ | 982 | int ret = 0; /* Will call free_more_memory() */ |
1003 | gfp_t gfp_mask; | 983 | gfp_t gfp_mask; |
1004 | 984 | ||
1005 | gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; | 985 | gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; |
1006 | gfp_mask |= __GFP_MOVABLE; | 986 | |
1007 | /* | 987 | /* |
1008 | * XXX: __getblk_slow() can not really deal with failure and | 988 | * XXX: __getblk_slow() can not really deal with failure and |
1009 | * will endlessly loop on improvised global reclaim. Prefer | 989 | * will endlessly loop on improvised global reclaim. Prefer |
@@ -1060,7 +1040,7 @@ failed: | |||
1060 | * that page was dirty, the buffers are set dirty also. | 1040 | * that page was dirty, the buffers are set dirty also. |
1061 | */ | 1041 | */ |
1062 | static int | 1042 | static int |
1063 | grow_buffers(struct block_device *bdev, sector_t block, int size) | 1043 | grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) |
1064 | { | 1044 | { |
1065 | pgoff_t index; | 1045 | pgoff_t index; |
1066 | int sizebits; | 1046 | int sizebits; |
@@ -1087,11 +1067,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) | |||
1087 | } | 1067 | } |
1088 | 1068 | ||
1089 | /* Create a page with the proper size buffers.. */ | 1069 | /* Create a page with the proper size buffers.. */ |
1090 | return grow_dev_page(bdev, block, index, size, sizebits); | 1070 | return grow_dev_page(bdev, block, index, size, sizebits, gfp); |
1091 | } | 1071 | } |
1092 | 1072 | ||
1093 | static struct buffer_head * | 1073 | struct buffer_head * |
1094 | __getblk_slow(struct block_device *bdev, sector_t block, int size) | 1074 | __getblk_slow(struct block_device *bdev, sector_t block, |
1075 | unsigned size, gfp_t gfp) | ||
1095 | { | 1076 | { |
1096 | /* Size must be multiple of hard sectorsize */ | 1077 | /* Size must be multiple of hard sectorsize */ |
1097 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || | 1078 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || |
@@ -1113,13 +1094,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
1113 | if (bh) | 1094 | if (bh) |
1114 | return bh; | 1095 | return bh; |
1115 | 1096 | ||
1116 | ret = grow_buffers(bdev, block, size); | 1097 | ret = grow_buffers(bdev, block, size, gfp); |
1117 | if (ret < 0) | 1098 | if (ret < 0) |
1118 | return NULL; | 1099 | return NULL; |
1119 | if (ret == 0) | 1100 | if (ret == 0) |
1120 | free_more_memory(); | 1101 | free_more_memory(); |
1121 | } | 1102 | } |
1122 | } | 1103 | } |
1104 | EXPORT_SYMBOL(__getblk_slow); | ||
1123 | 1105 | ||
1124 | /* | 1106 | /* |
1125 | * The relationship between dirty buffers and dirty pages: | 1107 | * The relationship between dirty buffers and dirty pages: |
@@ -1373,24 +1355,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) | |||
1373 | EXPORT_SYMBOL(__find_get_block); | 1355 | EXPORT_SYMBOL(__find_get_block); |
1374 | 1356 | ||
1375 | /* | 1357 | /* |
1376 | * __getblk will locate (and, if necessary, create) the buffer_head | 1358 | * __getblk_gfp() will locate (and, if necessary, create) the buffer_head |
1377 | * which corresponds to the passed block_device, block and size. The | 1359 | * which corresponds to the passed block_device, block and size. The |
1378 | * returned buffer has its reference count incremented. | 1360 | * returned buffer has its reference count incremented. |
1379 | * | 1361 | * |
1380 | * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() | 1362 | * __getblk_gfp() will lock up the machine if grow_dev_page's |
1381 | * attempt is failing. FIXME, perhaps? | 1363 | * try_to_free_buffers() attempt is failing. FIXME, perhaps? |
1382 | */ | 1364 | */ |
1383 | struct buffer_head * | 1365 | struct buffer_head * |
1384 | __getblk(struct block_device *bdev, sector_t block, unsigned size) | 1366 | __getblk_gfp(struct block_device *bdev, sector_t block, |
1367 | unsigned size, gfp_t gfp) | ||
1385 | { | 1368 | { |
1386 | struct buffer_head *bh = __find_get_block(bdev, block, size); | 1369 | struct buffer_head *bh = __find_get_block(bdev, block, size); |
1387 | 1370 | ||
1388 | might_sleep(); | 1371 | might_sleep(); |
1389 | if (bh == NULL) | 1372 | if (bh == NULL) |
1390 | bh = __getblk_slow(bdev, block, size); | 1373 | bh = __getblk_slow(bdev, block, size, gfp); |
1391 | return bh; | 1374 | return bh; |
1392 | } | 1375 | } |
1393 | EXPORT_SYMBOL(__getblk); | 1376 | EXPORT_SYMBOL(__getblk_gfp); |
1394 | 1377 | ||
1395 | /* | 1378 | /* |
1396 | * Do async read-ahead on a buffer.. | 1379 | * Do async read-ahead on a buffer.. |
@@ -1406,24 +1389,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) | |||
1406 | EXPORT_SYMBOL(__breadahead); | 1389 | EXPORT_SYMBOL(__breadahead); |
1407 | 1390 | ||
1408 | /** | 1391 | /** |
1409 | * __bread() - reads a specified block and returns the bh | 1392 | * __bread_gfp() - reads a specified block and returns the bh |
1410 | * @bdev: the block_device to read from | 1393 | * @bdev: the block_device to read from |
1411 | * @block: number of block | 1394 | * @block: number of block |
1412 | * @size: size (in bytes) to read | 1395 | * @size: size (in bytes) to read |
1413 | * | 1396 | * @gfp: page allocation flag |
1397 | * | ||
1414 | * Reads a specified block, and returns buffer head that contains it. | 1398 | * Reads a specified block, and returns buffer head that contains it. |
1399 | * The page cache can be allocated from non-movable area | ||
1400 | * not to prevent page migration if you set gfp to zero. | ||
1415 | * It returns NULL if the block was unreadable. | 1401 | * It returns NULL if the block was unreadable. |
1416 | */ | 1402 | */ |
1417 | struct buffer_head * | 1403 | struct buffer_head * |
1418 | __bread(struct block_device *bdev, sector_t block, unsigned size) | 1404 | __bread_gfp(struct block_device *bdev, sector_t block, |
1405 | unsigned size, gfp_t gfp) | ||
1419 | { | 1406 | { |
1420 | struct buffer_head *bh = __getblk(bdev, block, size); | 1407 | struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); |
1421 | 1408 | ||
1422 | if (likely(bh) && !buffer_uptodate(bh)) | 1409 | if (likely(bh) && !buffer_uptodate(bh)) |
1423 | bh = __bread_slow(bh); | 1410 | bh = __bread_slow(bh); |
1424 | return bh; | 1411 | return bh; |
1425 | } | 1412 | } |
1426 | EXPORT_SYMBOL(__bread); | 1413 | EXPORT_SYMBOL(__bread_gfp); |
1427 | 1414 | ||
1428 | /* | 1415 | /* |
1429 | * invalidate_bh_lrus() is called rarely - but not only at unmount. | 1416 | * invalidate_bh_lrus() is called rarely - but not only at unmount. |
@@ -2082,6 +2069,7 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
2082 | struct page *page, void *fsdata) | 2069 | struct page *page, void *fsdata) |
2083 | { | 2070 | { |
2084 | struct inode *inode = mapping->host; | 2071 | struct inode *inode = mapping->host; |
2072 | loff_t old_size = inode->i_size; | ||
2085 | int i_size_changed = 0; | 2073 | int i_size_changed = 0; |
2086 | 2074 | ||
2087 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 2075 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
@@ -2101,6 +2089,8 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
2101 | unlock_page(page); | 2089 | unlock_page(page); |
2102 | page_cache_release(page); | 2090 | page_cache_release(page); |
2103 | 2091 | ||
2092 | if (old_size < pos) | ||
2093 | pagecache_isize_extended(inode, old_size, pos); | ||
2104 | /* | 2094 | /* |
2105 | * Don't mark the inode dirty under page lock. First, it unnecessarily | 2095 | * Don't mark the inode dirty under page lock. First, it unnecessarily |
2106 | * makes the holding time of page lock longer. Second, it forces lock | 2096 | * makes the holding time of page lock longer. Second, it forces lock |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 659f2ea9e6f7..cefca661464b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2638 | 2638 | ||
2639 | for (i = 0; i < CEPH_CAP_BITS; i++) | 2639 | for (i = 0; i < CEPH_CAP_BITS; i++) |
2640 | if ((dirty & (1 << i)) && | 2640 | if ((dirty & (1 << i)) && |
2641 | flush_tid == ci->i_cap_flush_tid[i]) | 2641 | (u16)flush_tid == ci->i_cap_flush_tid[i]) |
2642 | cleaned |= 1 << i; | 2642 | cleaned |= 1 << i; |
2643 | 2643 | ||
2644 | dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," | 2644 | dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," |
diff --git a/fs/dcache.c b/fs/dcache.c index d5a23fd0da90..3ffef7f4e5cd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2673,11 +2673,13 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
2673 | if (!IS_ROOT(new)) { | 2673 | if (!IS_ROOT(new)) { |
2674 | spin_unlock(&inode->i_lock); | 2674 | spin_unlock(&inode->i_lock); |
2675 | dput(new); | 2675 | dput(new); |
2676 | iput(inode); | ||
2676 | return ERR_PTR(-EIO); | 2677 | return ERR_PTR(-EIO); |
2677 | } | 2678 | } |
2678 | if (d_ancestor(new, dentry)) { | 2679 | if (d_ancestor(new, dentry)) { |
2679 | spin_unlock(&inode->i_lock); | 2680 | spin_unlock(&inode->i_lock); |
2680 | dput(new); | 2681 | dput(new); |
2682 | iput(inode); | ||
2681 | return ERR_PTR(-EIO); | 2683 | return ERR_PTR(-EIO); |
2682 | } | 2684 | } |
2683 | write_seqlock(&rename_lock); | 2685 | write_seqlock(&rename_lock); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1b119d3bf924..c4cd1fd86cc2 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; | 567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; |
568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; | 568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; |
569 | s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; | ||
570 | |||
571 | rc = -EINVAL; | ||
572 | if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
573 | pr_err("eCryptfs: maximum fs stacking depth exceeded\n"); | ||
574 | goto out_free; | ||
575 | } | ||
569 | 576 | ||
570 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); | 577 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); |
571 | rc = PTR_ERR(inode); | 578 | rc = PTR_ERR(inode); |
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index 389ba8312d5d..b47c7b8dc275 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild | |||
@@ -4,7 +4,7 @@ | |||
4 | # Copyright (C) 2008 Panasas Inc. All rights reserved. | 4 | # Copyright (C) 2008 Panasas Inc. All rights reserved. |
5 | # | 5 | # |
6 | # Authors: | 6 | # Authors: |
7 | # Boaz Harrosh <bharrosh@panasas.com> | 7 | # Boaz Harrosh <ooo@electrozaur.com> |
8 | # | 8 | # |
9 | # This program is free software; you can redistribute it and/or modify | 9 | # This program is free software; you can redistribute it and/or modify |
10 | # it under the terms of the GNU General Public License version 2 | 10 | # it under the terms of the GNU General Public License version 2 |
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index 3bbd46956d77..7d88ef566213 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
@@ -4,7 +4,7 @@ | |||
4 | * Copyright (C) 2005, 2006 | 4 | * Copyright (C) 2005, 2006 |
5 | * Avishay Traeger (avishay@gmail.com) | 5 | * Avishay Traeger (avishay@gmail.com) |
6 | * Copyright (C) 2008, 2009 | 6 | * Copyright (C) 2008, 2009 |
7 | * Boaz Harrosh <bharrosh@panasas.com> | 7 | * Boaz Harrosh <ooo@electrozaur.com> |
8 | * | 8 | * |
9 | * Copyrights for code taken from ext2: | 9 | * Copyrights for code taken from ext2: |
10 | * Copyright (C) 1992, 1993, 1994, 1995 | 10 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 49f51ab4caac..d7defd557601 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index fffe86fd7a42..ad9cac670a47 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 71bf8e4fb5d4..1a376b42d305 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 3f9cafd73931..f1d3d4eb8c4f 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 4731fd991efe..28907460e8fa 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index cfc0205d62c4..7bd8ac8dfb28 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * This file is part of exofs. | 7 | * This file is part of exofs. |
8 | * | 8 | * |
@@ -29,7 +29,7 @@ | |||
29 | 29 | ||
30 | #include "ore_raid.h" | 30 | #include "ore_raid.h" |
31 | 31 | ||
32 | MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); | 32 | MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>"); |
33 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); | 33 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); |
34 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
35 | 35 | ||
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 84529b8a331b..27cbdb697649 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2011 | 2 | * Copyright (C) 2011 |
3 | * Boaz Harrosh <bharrosh@panasas.com> | 3 | * Boaz Harrosh <ooo@electrozaur.com> |
4 | * | 4 | * |
5 | * This file is part of the objects raid engine (ore). | 5 | * This file is part of the objects raid engine (ore). |
6 | * | 6 | * |
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h index cf6375d82129..a6e746775570 100644 --- a/fs/exofs/ore_raid.h +++ b/fs/exofs/ore_raid.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) from 2011 | 2 | * Copyright (C) from 2011 |
3 | * Boaz Harrosh <bharrosh@panasas.com> | 3 | * Boaz Harrosh <ooo@electrozaur.com> |
4 | * | 4 | * |
5 | * This file is part of the objects raid engine (ore). | 5 | * This file is part of the objects raid engine (ore). |
6 | * | 6 | * |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index ed73ed8ebbee..95965503afcb 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c index 4dd687c3e747..832e2624b80b 100644 --- a/fs/exofs/symlink.c +++ b/fs/exofs/symlink.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
6 | * | 6 | * |
7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index 1b4f2f95fc37..5e6a2c0a1f0b 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2012 | 2 | * Copyright (C) 2012 |
3 | * Sachin Bhamare <sbhamare@panasas.com> | 3 | * Sachin Bhamare <sbhamare@panasas.com> |
4 | * Boaz Harrosh <bharrosh@panasas.com> | 4 | * Boaz Harrosh <ooo@electrozaur.com> |
5 | * | 5 | * |
6 | * This file is part of exofs. | 6 | * This file is part of exofs. |
7 | * | 7 | * |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 7015db0bafd1..eb742d0e67ff 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -1354,13 +1354,6 @@ set_qf_format: | |||
1354 | "not specified."); | 1354 | "not specified."); |
1355 | return 0; | 1355 | return 0; |
1356 | } | 1356 | } |
1357 | } else { | ||
1358 | if (sbi->s_jquota_fmt) { | ||
1359 | ext3_msg(sb, KERN_ERR, "error: journaled quota format " | ||
1360 | "specified with no journaling " | ||
1361 | "enabled."); | ||
1362 | return 0; | ||
1363 | } | ||
1364 | } | 1357 | } |
1365 | #endif | 1358 | #endif |
1366 | return 1; | 1359 | return 1; |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 581ef40fbe90..83a6f497c4e0 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb, | |||
176 | } | 176 | } |
177 | 177 | ||
178 | /* Initializes an uninitialized block bitmap */ | 178 | /* Initializes an uninitialized block bitmap */ |
179 | static void ext4_init_block_bitmap(struct super_block *sb, | 179 | static int ext4_init_block_bitmap(struct super_block *sb, |
180 | struct buffer_head *bh, | 180 | struct buffer_head *bh, |
181 | ext4_group_t block_group, | 181 | ext4_group_t block_group, |
182 | struct ext4_group_desc *gdp) | 182 | struct ext4_group_desc *gdp) |
@@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb, | |||
192 | /* If checksum is bad mark all blocks used to prevent allocation | 192 | /* If checksum is bad mark all blocks used to prevent allocation |
193 | * essentially implementing a per-group read-only flag. */ | 193 | * essentially implementing a per-group read-only flag. */ |
194 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 194 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
195 | ext4_error(sb, "Checksum bad for group %u", block_group); | ||
196 | grp = ext4_get_group_info(sb, block_group); | 195 | grp = ext4_get_group_info(sb, block_group); |
197 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | 196 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) |
198 | percpu_counter_sub(&sbi->s_freeclusters_counter, | 197 | percpu_counter_sub(&sbi->s_freeclusters_counter, |
@@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb, | |||
205 | count); | 204 | count); |
206 | } | 205 | } |
207 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | 206 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
208 | return; | 207 | return -EIO; |
209 | } | 208 | } |
210 | memset(bh->b_data, 0, sb->s_blocksize); | 209 | memset(bh->b_data, 0, sb->s_blocksize); |
211 | 210 | ||
@@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb, | |||
243 | sb->s_blocksize * 8, bh->b_data); | 242 | sb->s_blocksize * 8, bh->b_data); |
244 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); | 243 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); |
245 | ext4_group_desc_csum_set(sb, block_group, gdp); | 244 | ext4_group_desc_csum_set(sb, block_group, gdp); |
245 | return 0; | ||
246 | } | 246 | } |
247 | 247 | ||
248 | /* Return the number of free blocks in a block group. It is used when | 248 | /* Return the number of free blocks in a block group. It is used when |
@@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | |||
438 | } | 438 | } |
439 | ext4_lock_group(sb, block_group); | 439 | ext4_lock_group(sb, block_group); |
440 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 440 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
441 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 441 | int err; |
442 | |||
443 | err = ext4_init_block_bitmap(sb, bh, block_group, desc); | ||
442 | set_bitmap_uptodate(bh); | 444 | set_bitmap_uptodate(bh); |
443 | set_buffer_uptodate(bh); | 445 | set_buffer_uptodate(bh); |
444 | ext4_unlock_group(sb, block_group); | 446 | ext4_unlock_group(sb, block_group); |
445 | unlock_buffer(bh); | 447 | unlock_buffer(bh); |
448 | if (err) | ||
449 | ext4_error(sb, "Checksum bad for grp %u", block_group); | ||
446 | return bh; | 450 | return bh; |
447 | } | 451 | } |
448 | ext4_unlock_group(sb, block_group); | 452 | ext4_unlock_group(sb, block_group); |
@@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
636 | * Account for the allocated meta blocks. We will never | 640 | * Account for the allocated meta blocks. We will never |
637 | * fail EDQUOT for metdata, but we do account for it. | 641 | * fail EDQUOT for metdata, but we do account for it. |
638 | */ | 642 | */ |
639 | if (!(*errp) && | 643 | if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { |
640 | ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { | ||
641 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 644 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
642 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 645 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
643 | dquot_alloc_block_nofail(inode, | 646 | dquot_alloc_block_nofail(inode, |
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 3285aa5a706a..b610779a958c 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | |||
24 | __u32 provided, calculated; | 24 | __u32 provided, calculated; |
25 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 25 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
26 | 26 | ||
27 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 27 | if (!ext4_has_metadata_csum(sb)) |
28 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
29 | return 1; | 28 | return 1; |
30 | 29 | ||
31 | provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); | 30 | provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); |
@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | |||
46 | __u32 csum; | 45 | __u32 csum; |
47 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 46 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
48 | 47 | ||
49 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 48 | if (!ext4_has_metadata_csum(sb)) |
50 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
51 | return; | 49 | return; |
52 | 50 | ||
53 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | 51 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); |
@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | |||
65 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 63 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
66 | int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; | 64 | int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; |
67 | 65 | ||
68 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 66 | if (!ext4_has_metadata_csum(sb)) |
69 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
70 | return 1; | 67 | return 1; |
71 | 68 | ||
72 | provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); | 69 | provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); |
@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | |||
91 | __u32 csum; | 88 | __u32 csum; |
92 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 89 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
93 | 90 | ||
94 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 91 | if (!ext4_has_metadata_csum(sb)) |
95 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
96 | return; | 92 | return; |
97 | 93 | ||
98 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | 94 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 0bb3f9ea0832..c24143ea9c08 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
151 | &file->f_ra, file, | 151 | &file->f_ra, file, |
152 | index, 1); | 152 | index, 1); |
153 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 153 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
154 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); | 154 | bh = ext4_bread(NULL, inode, map.m_lblk, 0); |
155 | if (IS_ERR(bh)) | ||
156 | return PTR_ERR(bh); | ||
155 | } | 157 | } |
156 | 158 | ||
157 | /* | ||
158 | * We ignore I/O errors on directories so users have a chance | ||
159 | * of recovering data when there's a bad sector | ||
160 | */ | ||
161 | if (!bh) { | 159 | if (!bh) { |
162 | if (!dir_has_error) { | 160 | if (!dir_has_error) { |
163 | EXT4_ERROR_FILE(file, 0, | 161 | EXT4_ERROR_FILE(file, 0, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b0c225cdb52c..c55a1faaed58 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -572,15 +572,15 @@ enum { | |||
572 | 572 | ||
573 | /* | 573 | /* |
574 | * The bit position of these flags must not overlap with any of the | 574 | * The bit position of these flags must not overlap with any of the |
575 | * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(), | 575 | * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), |
576 | * read_extent_tree_block(), ext4_split_extent_at(), | 576 | * read_extent_tree_block(), ext4_split_extent_at(), |
577 | * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). | 577 | * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). |
578 | * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be | 578 | * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be |
579 | * caching the extents when reading from the extent tree while a | 579 | * caching the extents when reading from the extent tree while a |
580 | * truncate or punch hole operation is in progress. | 580 | * truncate or punch hole operation is in progress. |
581 | */ | 581 | */ |
582 | #define EXT4_EX_NOCACHE 0x0400 | 582 | #define EXT4_EX_NOCACHE 0x40000000 |
583 | #define EXT4_EX_FORCE_CACHE 0x0800 | 583 | #define EXT4_EX_FORCE_CACHE 0x20000000 |
584 | 584 | ||
585 | /* | 585 | /* |
586 | * Flags used by ext4_free_blocks | 586 | * Flags used by ext4_free_blocks |
@@ -890,6 +890,7 @@ struct ext4_inode_info { | |||
890 | struct ext4_es_tree i_es_tree; | 890 | struct ext4_es_tree i_es_tree; |
891 | rwlock_t i_es_lock; | 891 | rwlock_t i_es_lock; |
892 | struct list_head i_es_lru; | 892 | struct list_head i_es_lru; |
893 | unsigned int i_es_all_nr; /* protected by i_es_lock */ | ||
893 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 894 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ |
894 | unsigned long i_touch_when; /* jiffies of last accessing */ | 895 | unsigned long i_touch_when; /* jiffies of last accessing */ |
895 | 896 | ||
@@ -1174,6 +1175,9 @@ struct ext4_super_block { | |||
1174 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 | 1175 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 |
1175 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ | 1176 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ |
1176 | 1177 | ||
1178 | /* Number of quota types we support */ | ||
1179 | #define EXT4_MAXQUOTAS 2 | ||
1180 | |||
1177 | /* | 1181 | /* |
1178 | * fourth extended-fs super-block data in memory | 1182 | * fourth extended-fs super-block data in memory |
1179 | */ | 1183 | */ |
@@ -1237,7 +1241,7 @@ struct ext4_sb_info { | |||
1237 | u32 s_min_batch_time; | 1241 | u32 s_min_batch_time; |
1238 | struct block_device *journal_bdev; | 1242 | struct block_device *journal_bdev; |
1239 | #ifdef CONFIG_QUOTA | 1243 | #ifdef CONFIG_QUOTA |
1240 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | 1244 | char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ |
1241 | int s_jquota_fmt; /* Format of quota to use */ | 1245 | int s_jquota_fmt; /* Format of quota to use */ |
1242 | #endif | 1246 | #endif |
1243 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | 1247 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ |
@@ -1330,8 +1334,7 @@ struct ext4_sb_info { | |||
1330 | /* Reclaim extents from extent status tree */ | 1334 | /* Reclaim extents from extent status tree */ |
1331 | struct shrinker s_es_shrinker; | 1335 | struct shrinker s_es_shrinker; |
1332 | struct list_head s_es_lru; | 1336 | struct list_head s_es_lru; |
1333 | unsigned long s_es_last_sorted; | 1337 | struct ext4_es_stats s_es_stats; |
1334 | struct percpu_counter s_extent_cache_cnt; | ||
1335 | struct mb_cache *s_mb_cache; | 1338 | struct mb_cache *s_mb_cache; |
1336 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1339 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
1337 | 1340 | ||
@@ -1399,7 +1402,6 @@ enum { | |||
1399 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ | 1402 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ |
1400 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1403 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1401 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | 1404 | EXT4_STATE_NEWENTRY, /* File just added to dir */ |
1402 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ | ||
1403 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read | 1405 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read |
1404 | nolocking */ | 1406 | nolocking */ |
1405 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ | 1407 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ |
@@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
2086 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); | 2088 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
2087 | 2089 | ||
2088 | /* inode.c */ | 2090 | /* inode.c */ |
2089 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 2091 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); |
2090 | ext4_lblk_t, int, int *); | 2092 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); |
2091 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | ||
2092 | ext4_lblk_t, int, int *); | ||
2093 | int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2093 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
2094 | struct buffer_head *bh_result, int create); | 2094 | struct buffer_head *bh_result, int create); |
2095 | int ext4_get_block(struct inode *inode, sector_t iblock, | 2095 | int ext4_get_block(struct inode *inode, sector_t iblock, |
@@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle, | |||
2109 | #define CONVERT_INLINE_DATA 2 | 2109 | #define CONVERT_INLINE_DATA 2 |
2110 | 2110 | ||
2111 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 2111 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
2112 | extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); | ||
2112 | extern int ext4_write_inode(struct inode *, struct writeback_control *); | 2113 | extern int ext4_write_inode(struct inode *, struct writeback_control *); |
2113 | extern int ext4_setattr(struct dentry *, struct iattr *); | 2114 | extern int ext4_setattr(struct dentry *, struct iattr *); |
2114 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 2115 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
@@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb, | |||
2332 | static inline int ext4_has_group_desc_csum(struct super_block *sb) | 2333 | static inline int ext4_has_group_desc_csum(struct super_block *sb) |
2333 | { | 2334 | { |
2334 | return EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2335 | return EXT4_HAS_RO_COMPAT_FEATURE(sb, |
2335 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM | | 2336 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || |
2336 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); | 2337 | (EXT4_SB(sb)->s_chksum_driver != NULL); |
2337 | } | 2338 | } |
2338 | 2339 | ||
2340 | static inline int ext4_has_metadata_csum(struct super_block *sb) | ||
2341 | { | ||
2342 | WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2343 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
2344 | !EXT4_SB(sb)->s_chksum_driver); | ||
2345 | |||
2346 | return (EXT4_SB(sb)->s_chksum_driver != NULL); | ||
2347 | } | ||
2339 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) | 2348 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) |
2340 | { | 2349 | { |
2341 | return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | | 2350 | return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | |
@@ -2731,21 +2740,26 @@ extern int ext4_can_extents_be_merged(struct inode *inode, | |||
2731 | struct ext4_extent *ex1, | 2740 | struct ext4_extent *ex1, |
2732 | struct ext4_extent *ex2); | 2741 | struct ext4_extent *ex2); |
2733 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, | 2742 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, |
2734 | struct ext4_ext_path *, | 2743 | struct ext4_ext_path **, |
2735 | struct ext4_extent *, int); | 2744 | struct ext4_extent *, int); |
2736 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 2745 | extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, |
2737 | struct ext4_ext_path *, | 2746 | struct ext4_ext_path **, |
2738 | int flags); | 2747 | int flags); |
2739 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 2748 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
2740 | extern int ext4_ext_check_inode(struct inode *inode); | 2749 | extern int ext4_ext_check_inode(struct inode *inode); |
2741 | extern int ext4_find_delalloc_range(struct inode *inode, | 2750 | extern int ext4_find_delalloc_range(struct inode *inode, |
2742 | ext4_lblk_t lblk_start, | 2751 | ext4_lblk_t lblk_start, |
2743 | ext4_lblk_t lblk_end); | 2752 | ext4_lblk_t lblk_end); |
2744 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); | 2753 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); |
2754 | extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); | ||
2745 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2755 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2746 | __u64 start, __u64 len); | 2756 | __u64 start, __u64 len); |
2747 | extern int ext4_ext_precache(struct inode *inode); | 2757 | extern int ext4_ext_precache(struct inode *inode); |
2748 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); | 2758 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); |
2759 | extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, | ||
2760 | struct inode *inode2, ext4_lblk_t lblk1, | ||
2761 | ext4_lblk_t lblk2, ext4_lblk_t count, | ||
2762 | int mark_unwritten,int *err); | ||
2749 | 2763 | ||
2750 | /* move_extent.c */ | 2764 | /* move_extent.c */ |
2751 | extern void ext4_double_down_write_data_sem(struct inode *first, | 2765 | extern void ext4_double_down_write_data_sem(struct inode *first, |
@@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode, | |||
2755 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | 2769 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, |
2756 | __u64 start_orig, __u64 start_donor, | 2770 | __u64 start_orig, __u64 start_donor, |
2757 | __u64 len, __u64 *moved_len); | 2771 | __u64 len, __u64 *moved_len); |
2758 | extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
2759 | struct ext4_extent **extent); | ||
2760 | 2772 | ||
2761 | /* page-io.c */ | 2773 | /* page-io.c */ |
2762 | extern int __init ext4_init_pageio(void); | 2774 | extern int __init ext4_init_pageio(void); |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index a867f5ca9991..3c9381547094 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh) | |||
123 | struct ext4_ext_path { | 123 | struct ext4_ext_path { |
124 | ext4_fsblk_t p_block; | 124 | ext4_fsblk_t p_block; |
125 | __u16 p_depth; | 125 | __u16 p_depth; |
126 | __u16 p_maxdepth; | ||
126 | struct ext4_extent *p_ext; | 127 | struct ext4_extent *p_ext; |
127 | struct ext4_extent_idx *p_idx; | 128 | struct ext4_extent_idx *p_idx; |
128 | struct ext4_extent_header *p_hdr; | 129 | struct ext4_extent_header *p_hdr; |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0074e0d23d6e..3445035c7e01 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
256 | set_buffer_prio(bh); | 256 | set_buffer_prio(bh); |
257 | if (ext4_handle_valid(handle)) { | 257 | if (ext4_handle_valid(handle)) { |
258 | err = jbd2_journal_dirty_metadata(handle, bh); | 258 | err = jbd2_journal_dirty_metadata(handle, bh); |
259 | /* Errors can only happen if there is a bug */ | 259 | /* Errors can only happen due to aborted journal or a nasty bug */ |
260 | if (WARN_ON_ONCE(err)) { | 260 | if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) { |
261 | ext4_journal_abort_handle(where, line, __func__, bh, | 261 | ext4_journal_abort_handle(where, line, __func__, bh, |
262 | handle, err); | 262 | handle, err); |
263 | if (inode == NULL) { | 263 | if (inode == NULL) { |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 17c00ff202f2..9c5b49fb281e 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -102,9 +102,9 @@ | |||
102 | #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 | 102 | #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 |
103 | #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 | 103 | #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 |
104 | #endif | 104 | #endif |
105 | #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 105 | #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) |
106 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) | 106 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) |
107 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) | 107 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) |
108 | 108 | ||
109 | static inline int ext4_jbd2_credits_xattr(struct inode *inode) | 109 | static inline int ext4_jbd2_credits_xattr(struct inode *inode) |
110 | { | 110 | { |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74292a71b384..0b16fb4c06d3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode, | |||
73 | { | 73 | { |
74 | struct ext4_extent_tail *et; | 74 | struct ext4_extent_tail *et; |
75 | 75 | ||
76 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 76 | if (!ext4_has_metadata_csum(inode->i_sb)) |
77 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
78 | return 1; | 77 | return 1; |
79 | 78 | ||
80 | et = find_ext4_extent_tail(eh); | 79 | et = find_ext4_extent_tail(eh); |
@@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode, | |||
88 | { | 87 | { |
89 | struct ext4_extent_tail *et; | 88 | struct ext4_extent_tail *et; |
90 | 89 | ||
91 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 90 | if (!ext4_has_metadata_csum(inode->i_sb)) |
92 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
93 | return; | 91 | return; |
94 | 92 | ||
95 | et = find_ext4_extent_tail(eh); | 93 | et = find_ext4_extent_tail(eh); |
@@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode, | |||
98 | 96 | ||
99 | static int ext4_split_extent(handle_t *handle, | 97 | static int ext4_split_extent(handle_t *handle, |
100 | struct inode *inode, | 98 | struct inode *inode, |
101 | struct ext4_ext_path *path, | 99 | struct ext4_ext_path **ppath, |
102 | struct ext4_map_blocks *map, | 100 | struct ext4_map_blocks *map, |
103 | int split_flag, | 101 | int split_flag, |
104 | int flags); | 102 | int flags); |
105 | 103 | ||
106 | static int ext4_split_extent_at(handle_t *handle, | 104 | static int ext4_split_extent_at(handle_t *handle, |
107 | struct inode *inode, | 105 | struct inode *inode, |
108 | struct ext4_ext_path *path, | 106 | struct ext4_ext_path **ppath, |
109 | ext4_lblk_t split, | 107 | ext4_lblk_t split, |
110 | int split_flag, | 108 | int split_flag, |
111 | int flags); | 109 | int flags); |
@@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
291 | return size; | 289 | return size; |
292 | } | 290 | } |
293 | 291 | ||
292 | static inline int | ||
293 | ext4_force_split_extent_at(handle_t *handle, struct inode *inode, | ||
294 | struct ext4_ext_path **ppath, ext4_lblk_t lblk, | ||
295 | int nofail) | ||
296 | { | ||
297 | struct ext4_ext_path *path = *ppath; | ||
298 | int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); | ||
299 | |||
300 | return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? | ||
301 | EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, | ||
302 | EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | | ||
303 | (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); | ||
304 | } | ||
305 | |||
294 | /* | 306 | /* |
295 | * Calculate the number of metadata blocks needed | 307 | * Calculate the number of metadata blocks needed |
296 | * to allocate @blocks | 308 | * to allocate @blocks |
@@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, | |||
695 | 707 | ||
696 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 708 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
697 | { | 709 | { |
698 | int depth = path->p_depth; | 710 | int depth, i; |
699 | int i; | ||
700 | 711 | ||
712 | if (!path) | ||
713 | return; | ||
714 | depth = path->p_depth; | ||
701 | for (i = 0; i <= depth; i++, path++) | 715 | for (i = 0; i <= depth; i++, path++) |
702 | if (path->p_bh) { | 716 | if (path->p_bh) { |
703 | brelse(path->p_bh); | 717 | brelse(path->p_bh); |
@@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
841 | } | 855 | } |
842 | 856 | ||
843 | struct ext4_ext_path * | 857 | struct ext4_ext_path * |
844 | ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | 858 | ext4_find_extent(struct inode *inode, ext4_lblk_t block, |
845 | struct ext4_ext_path *path, int flags) | 859 | struct ext4_ext_path **orig_path, int flags) |
846 | { | 860 | { |
847 | struct ext4_extent_header *eh; | 861 | struct ext4_extent_header *eh; |
848 | struct buffer_head *bh; | 862 | struct buffer_head *bh; |
849 | short int depth, i, ppos = 0, alloc = 0; | 863 | struct ext4_ext_path *path = orig_path ? *orig_path : NULL; |
864 | short int depth, i, ppos = 0; | ||
850 | int ret; | 865 | int ret; |
851 | 866 | ||
852 | eh = ext_inode_hdr(inode); | 867 | eh = ext_inode_hdr(inode); |
853 | depth = ext_depth(inode); | 868 | depth = ext_depth(inode); |
854 | 869 | ||
855 | /* account possible depth increase */ | 870 | if (path) { |
871 | ext4_ext_drop_refs(path); | ||
872 | if (depth > path[0].p_maxdepth) { | ||
873 | kfree(path); | ||
874 | *orig_path = path = NULL; | ||
875 | } | ||
876 | } | ||
856 | if (!path) { | 877 | if (!path) { |
878 | /* account possible depth increase */ | ||
857 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), | 879 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), |
858 | GFP_NOFS); | 880 | GFP_NOFS); |
859 | if (!path) | 881 | if (unlikely(!path)) |
860 | return ERR_PTR(-ENOMEM); | 882 | return ERR_PTR(-ENOMEM); |
861 | alloc = 1; | 883 | path[0].p_maxdepth = depth + 1; |
862 | } | 884 | } |
863 | path[0].p_hdr = eh; | 885 | path[0].p_hdr = eh; |
864 | path[0].p_bh = NULL; | 886 | path[0].p_bh = NULL; |
@@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
876 | 898 | ||
877 | bh = read_extent_tree_block(inode, path[ppos].p_block, --i, | 899 | bh = read_extent_tree_block(inode, path[ppos].p_block, --i, |
878 | flags); | 900 | flags); |
879 | if (IS_ERR(bh)) { | 901 | if (unlikely(IS_ERR(bh))) { |
880 | ret = PTR_ERR(bh); | 902 | ret = PTR_ERR(bh); |
881 | goto err; | 903 | goto err; |
882 | } | 904 | } |
@@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
910 | 932 | ||
911 | err: | 933 | err: |
912 | ext4_ext_drop_refs(path); | 934 | ext4_ext_drop_refs(path); |
913 | if (alloc) | 935 | kfree(path); |
914 | kfree(path); | 936 | if (orig_path) |
937 | *orig_path = NULL; | ||
915 | return ERR_PTR(ret); | 938 | return ERR_PTR(ret); |
916 | } | 939 | } |
917 | 940 | ||
@@ -1238,16 +1261,24 @@ cleanup: | |||
1238 | * just created block | 1261 | * just created block |
1239 | */ | 1262 | */ |
1240 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | 1263 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, |
1241 | unsigned int flags, | 1264 | unsigned int flags) |
1242 | struct ext4_extent *newext) | ||
1243 | { | 1265 | { |
1244 | struct ext4_extent_header *neh; | 1266 | struct ext4_extent_header *neh; |
1245 | struct buffer_head *bh; | 1267 | struct buffer_head *bh; |
1246 | ext4_fsblk_t newblock; | 1268 | ext4_fsblk_t newblock, goal = 0; |
1269 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
1247 | int err = 0; | 1270 | int err = 0; |
1248 | 1271 | ||
1249 | newblock = ext4_ext_new_meta_block(handle, inode, NULL, | 1272 | /* Try to prepend new index to old one */ |
1250 | newext, &err, flags); | 1273 | if (ext_depth(inode)) |
1274 | goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); | ||
1275 | if (goal > le32_to_cpu(es->s_first_data_block)) { | ||
1276 | flags |= EXT4_MB_HINT_TRY_GOAL; | ||
1277 | goal--; | ||
1278 | } else | ||
1279 | goal = ext4_inode_to_goal_block(inode); | ||
1280 | newblock = ext4_new_meta_blocks(handle, inode, goal, flags, | ||
1281 | NULL, &err); | ||
1251 | if (newblock == 0) | 1282 | if (newblock == 0) |
1252 | return err; | 1283 | return err; |
1253 | 1284 | ||
@@ -1314,9 +1345,10 @@ out: | |||
1314 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, | 1345 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, |
1315 | unsigned int mb_flags, | 1346 | unsigned int mb_flags, |
1316 | unsigned int gb_flags, | 1347 | unsigned int gb_flags, |
1317 | struct ext4_ext_path *path, | 1348 | struct ext4_ext_path **ppath, |
1318 | struct ext4_extent *newext) | 1349 | struct ext4_extent *newext) |
1319 | { | 1350 | { |
1351 | struct ext4_ext_path *path = *ppath; | ||
1320 | struct ext4_ext_path *curp; | 1352 | struct ext4_ext_path *curp; |
1321 | int depth, i, err = 0; | 1353 | int depth, i, err = 0; |
1322 | 1354 | ||
@@ -1340,23 +1372,21 @@ repeat: | |||
1340 | goto out; | 1372 | goto out; |
1341 | 1373 | ||
1342 | /* refill path */ | 1374 | /* refill path */ |
1343 | ext4_ext_drop_refs(path); | 1375 | path = ext4_find_extent(inode, |
1344 | path = ext4_ext_find_extent(inode, | ||
1345 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), | 1376 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
1346 | path, gb_flags); | 1377 | ppath, gb_flags); |
1347 | if (IS_ERR(path)) | 1378 | if (IS_ERR(path)) |
1348 | err = PTR_ERR(path); | 1379 | err = PTR_ERR(path); |
1349 | } else { | 1380 | } else { |
1350 | /* tree is full, time to grow in depth */ | 1381 | /* tree is full, time to grow in depth */ |
1351 | err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext); | 1382 | err = ext4_ext_grow_indepth(handle, inode, mb_flags); |
1352 | if (err) | 1383 | if (err) |
1353 | goto out; | 1384 | goto out; |
1354 | 1385 | ||
1355 | /* refill path */ | 1386 | /* refill path */ |
1356 | ext4_ext_drop_refs(path); | 1387 | path = ext4_find_extent(inode, |
1357 | path = ext4_ext_find_extent(inode, | ||
1358 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), | 1388 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
1359 | path, gb_flags); | 1389 | ppath, gb_flags); |
1360 | if (IS_ERR(path)) { | 1390 | if (IS_ERR(path)) { |
1361 | err = PTR_ERR(path); | 1391 | err = PTR_ERR(path); |
1362 | goto out; | 1392 | goto out; |
@@ -1559,7 +1589,7 @@ found_extent: | |||
1559 | * allocated block. Thus, index entries have to be consistent | 1589 | * allocated block. Thus, index entries have to be consistent |
1560 | * with leaves. | 1590 | * with leaves. |
1561 | */ | 1591 | */ |
1562 | static ext4_lblk_t | 1592 | ext4_lblk_t |
1563 | ext4_ext_next_allocated_block(struct ext4_ext_path *path) | 1593 | ext4_ext_next_allocated_block(struct ext4_ext_path *path) |
1564 | { | 1594 | { |
1565 | int depth; | 1595 | int depth; |
@@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, | |||
1802 | sizeof(struct ext4_extent_idx); | 1832 | sizeof(struct ext4_extent_idx); |
1803 | s += sizeof(struct ext4_extent_header); | 1833 | s += sizeof(struct ext4_extent_header); |
1804 | 1834 | ||
1835 | path[1].p_maxdepth = path[0].p_maxdepth; | ||
1805 | memcpy(path[0].p_hdr, path[1].p_hdr, s); | 1836 | memcpy(path[0].p_hdr, path[1].p_hdr, s); |
1806 | path[0].p_depth = 0; | 1837 | path[0].p_depth = 0; |
1807 | path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + | 1838 | path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + |
@@ -1896,9 +1927,10 @@ out: | |||
1896 | * creating new leaf in the no-space case. | 1927 | * creating new leaf in the no-space case. |
1897 | */ | 1928 | */ |
1898 | int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | 1929 | int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, |
1899 | struct ext4_ext_path *path, | 1930 | struct ext4_ext_path **ppath, |
1900 | struct ext4_extent *newext, int gb_flags) | 1931 | struct ext4_extent *newext, int gb_flags) |
1901 | { | 1932 | { |
1933 | struct ext4_ext_path *path = *ppath; | ||
1902 | struct ext4_extent_header *eh; | 1934 | struct ext4_extent_header *eh; |
1903 | struct ext4_extent *ex, *fex; | 1935 | struct ext4_extent *ex, *fex; |
1904 | struct ext4_extent *nearex; /* nearest extent */ | 1936 | struct ext4_extent *nearex; /* nearest extent */ |
@@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1907 | ext4_lblk_t next; | 1939 | ext4_lblk_t next; |
1908 | int mb_flags = 0, unwritten; | 1940 | int mb_flags = 0, unwritten; |
1909 | 1941 | ||
1942 | if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
1943 | mb_flags |= EXT4_MB_DELALLOC_RESERVED; | ||
1910 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1944 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
1911 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1945 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
1912 | return -EIO; | 1946 | return -EIO; |
@@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1925 | /* | 1959 | /* |
1926 | * Try to see whether we should rather test the extent on | 1960 | * Try to see whether we should rather test the extent on |
1927 | * right from ex, or from the left of ex. This is because | 1961 | * right from ex, or from the left of ex. This is because |
1928 | * ext4_ext_find_extent() can return either extent on the | 1962 | * ext4_find_extent() can return either extent on the |
1929 | * left, or on the right from the searched position. This | 1963 | * left, or on the right from the searched position. This |
1930 | * will make merging more effective. | 1964 | * will make merging more effective. |
1931 | */ | 1965 | */ |
@@ -2008,7 +2042,7 @@ prepend: | |||
2008 | if (next != EXT_MAX_BLOCKS) { | 2042 | if (next != EXT_MAX_BLOCKS) { |
2009 | ext_debug("next leaf block - %u\n", next); | 2043 | ext_debug("next leaf block - %u\n", next); |
2010 | BUG_ON(npath != NULL); | 2044 | BUG_ON(npath != NULL); |
2011 | npath = ext4_ext_find_extent(inode, next, NULL, 0); | 2045 | npath = ext4_find_extent(inode, next, NULL, 0); |
2012 | if (IS_ERR(npath)) | 2046 | if (IS_ERR(npath)) |
2013 | return PTR_ERR(npath); | 2047 | return PTR_ERR(npath); |
2014 | BUG_ON(npath->p_depth != path->p_depth); | 2048 | BUG_ON(npath->p_depth != path->p_depth); |
@@ -2028,9 +2062,9 @@ prepend: | |||
2028 | * We're gonna add a new leaf in the tree. | 2062 | * We're gonna add a new leaf in the tree. |
2029 | */ | 2063 | */ |
2030 | if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) | 2064 | if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) |
2031 | mb_flags = EXT4_MB_USE_RESERVED; | 2065 | mb_flags |= EXT4_MB_USE_RESERVED; |
2032 | err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, | 2066 | err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, |
2033 | path, newext); | 2067 | ppath, newext); |
2034 | if (err) | 2068 | if (err) |
2035 | goto cleanup; | 2069 | goto cleanup; |
2036 | depth = ext_depth(inode); | 2070 | depth = ext_depth(inode); |
@@ -2108,10 +2142,8 @@ merge: | |||
2108 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | 2142 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
2109 | 2143 | ||
2110 | cleanup: | 2144 | cleanup: |
2111 | if (npath) { | 2145 | ext4_ext_drop_refs(npath); |
2112 | ext4_ext_drop_refs(npath); | 2146 | kfree(npath); |
2113 | kfree(npath); | ||
2114 | } | ||
2115 | return err; | 2147 | return err; |
2116 | } | 2148 | } |
2117 | 2149 | ||
@@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2133 | /* find extent for this block */ | 2165 | /* find extent for this block */ |
2134 | down_read(&EXT4_I(inode)->i_data_sem); | 2166 | down_read(&EXT4_I(inode)->i_data_sem); |
2135 | 2167 | ||
2136 | if (path && ext_depth(inode) != depth) { | 2168 | path = ext4_find_extent(inode, block, &path, 0); |
2137 | /* depth was changed. we have to realloc path */ | ||
2138 | kfree(path); | ||
2139 | path = NULL; | ||
2140 | } | ||
2141 | |||
2142 | path = ext4_ext_find_extent(inode, block, path, 0); | ||
2143 | if (IS_ERR(path)) { | 2169 | if (IS_ERR(path)) { |
2144 | up_read(&EXT4_I(inode)->i_data_sem); | 2170 | up_read(&EXT4_I(inode)->i_data_sem); |
2145 | err = PTR_ERR(path); | 2171 | err = PTR_ERR(path); |
@@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2156 | } | 2182 | } |
2157 | ex = path[depth].p_ext; | 2183 | ex = path[depth].p_ext; |
2158 | next = ext4_ext_next_allocated_block(path); | 2184 | next = ext4_ext_next_allocated_block(path); |
2159 | ext4_ext_drop_refs(path); | ||
2160 | 2185 | ||
2161 | flags = 0; | 2186 | flags = 0; |
2162 | exists = 0; | 2187 | exists = 0; |
@@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2266 | block = es.es_lblk + es.es_len; | 2291 | block = es.es_lblk + es.es_len; |
2267 | } | 2292 | } |
2268 | 2293 | ||
2269 | if (path) { | 2294 | ext4_ext_drop_refs(path); |
2270 | ext4_ext_drop_refs(path); | 2295 | kfree(path); |
2271 | kfree(path); | ||
2272 | } | ||
2273 | |||
2274 | return err; | 2296 | return err; |
2275 | } | 2297 | } |
2276 | 2298 | ||
@@ -2826,7 +2848,7 @@ again: | |||
2826 | ext4_lblk_t ee_block; | 2848 | ext4_lblk_t ee_block; |
2827 | 2849 | ||
2828 | /* find extent for this block */ | 2850 | /* find extent for this block */ |
2829 | path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); | 2851 | path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); |
2830 | if (IS_ERR(path)) { | 2852 | if (IS_ERR(path)) { |
2831 | ext4_journal_stop(handle); | 2853 | ext4_journal_stop(handle); |
2832 | return PTR_ERR(path); | 2854 | return PTR_ERR(path); |
@@ -2854,24 +2876,14 @@ again: | |||
2854 | */ | 2876 | */ |
2855 | if (end >= ee_block && | 2877 | if (end >= ee_block && |
2856 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { | 2878 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { |
2857 | int split_flag = 0; | ||
2858 | |||
2859 | if (ext4_ext_is_unwritten(ex)) | ||
2860 | split_flag = EXT4_EXT_MARK_UNWRIT1 | | ||
2861 | EXT4_EXT_MARK_UNWRIT2; | ||
2862 | |||
2863 | /* | 2879 | /* |
2864 | * Split the extent in two so that 'end' is the last | 2880 | * Split the extent in two so that 'end' is the last |
2865 | * block in the first new extent. Also we should not | 2881 | * block in the first new extent. Also we should not |
2866 | * fail removing space due to ENOSPC so try to use | 2882 | * fail removing space due to ENOSPC so try to use |
2867 | * reserved block if that happens. | 2883 | * reserved block if that happens. |
2868 | */ | 2884 | */ |
2869 | err = ext4_split_extent_at(handle, inode, path, | 2885 | err = ext4_force_split_extent_at(handle, inode, &path, |
2870 | end + 1, split_flag, | 2886 | end + 1, 1); |
2871 | EXT4_EX_NOCACHE | | ||
2872 | EXT4_GET_BLOCKS_PRE_IO | | ||
2873 | EXT4_GET_BLOCKS_METADATA_NOFAIL); | ||
2874 | |||
2875 | if (err < 0) | 2887 | if (err < 0) |
2876 | goto out; | 2888 | goto out; |
2877 | } | 2889 | } |
@@ -2893,7 +2905,7 @@ again: | |||
2893 | ext4_journal_stop(handle); | 2905 | ext4_journal_stop(handle); |
2894 | return -ENOMEM; | 2906 | return -ENOMEM; |
2895 | } | 2907 | } |
2896 | path[0].p_depth = depth; | 2908 | path[0].p_maxdepth = path[0].p_depth = depth; |
2897 | path[0].p_hdr = ext_inode_hdr(inode); | 2909 | path[0].p_hdr = ext_inode_hdr(inode); |
2898 | i = 0; | 2910 | i = 0; |
2899 | 2911 | ||
@@ -3013,10 +3025,9 @@ again: | |||
3013 | out: | 3025 | out: |
3014 | ext4_ext_drop_refs(path); | 3026 | ext4_ext_drop_refs(path); |
3015 | kfree(path); | 3027 | kfree(path); |
3016 | if (err == -EAGAIN) { | 3028 | path = NULL; |
3017 | path = NULL; | 3029 | if (err == -EAGAIN) |
3018 | goto again; | 3030 | goto again; |
3019 | } | ||
3020 | ext4_journal_stop(handle); | 3031 | ext4_journal_stop(handle); |
3021 | 3032 | ||
3022 | return err; | 3033 | return err; |
@@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
3130 | */ | 3141 | */ |
3131 | static int ext4_split_extent_at(handle_t *handle, | 3142 | static int ext4_split_extent_at(handle_t *handle, |
3132 | struct inode *inode, | 3143 | struct inode *inode, |
3133 | struct ext4_ext_path *path, | 3144 | struct ext4_ext_path **ppath, |
3134 | ext4_lblk_t split, | 3145 | ext4_lblk_t split, |
3135 | int split_flag, | 3146 | int split_flag, |
3136 | int flags) | 3147 | int flags) |
3137 | { | 3148 | { |
3149 | struct ext4_ext_path *path = *ppath; | ||
3138 | ext4_fsblk_t newblock; | 3150 | ext4_fsblk_t newblock; |
3139 | ext4_lblk_t ee_block; | 3151 | ext4_lblk_t ee_block; |
3140 | struct ext4_extent *ex, newex, orig_ex, zero_ex; | 3152 | struct ext4_extent *ex, newex, orig_ex, zero_ex; |
@@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle, | |||
3205 | if (split_flag & EXT4_EXT_MARK_UNWRIT2) | 3217 | if (split_flag & EXT4_EXT_MARK_UNWRIT2) |
3206 | ext4_ext_mark_unwritten(ex2); | 3218 | ext4_ext_mark_unwritten(ex2); |
3207 | 3219 | ||
3208 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3220 | err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); |
3209 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 3221 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { |
3210 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { | 3222 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { |
3211 | if (split_flag & EXT4_EXT_DATA_VALID1) { | 3223 | if (split_flag & EXT4_EXT_DATA_VALID1) { |
@@ -3271,11 +3283,12 @@ fix_extent_len: | |||
3271 | */ | 3283 | */ |
3272 | static int ext4_split_extent(handle_t *handle, | 3284 | static int ext4_split_extent(handle_t *handle, |
3273 | struct inode *inode, | 3285 | struct inode *inode, |
3274 | struct ext4_ext_path *path, | 3286 | struct ext4_ext_path **ppath, |
3275 | struct ext4_map_blocks *map, | 3287 | struct ext4_map_blocks *map, |
3276 | int split_flag, | 3288 | int split_flag, |
3277 | int flags) | 3289 | int flags) |
3278 | { | 3290 | { |
3291 | struct ext4_ext_path *path = *ppath; | ||
3279 | ext4_lblk_t ee_block; | 3292 | ext4_lblk_t ee_block; |
3280 | struct ext4_extent *ex; | 3293 | struct ext4_extent *ex; |
3281 | unsigned int ee_len, depth; | 3294 | unsigned int ee_len, depth; |
@@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle, | |||
3298 | EXT4_EXT_MARK_UNWRIT2; | 3311 | EXT4_EXT_MARK_UNWRIT2; |
3299 | if (split_flag & EXT4_EXT_DATA_VALID2) | 3312 | if (split_flag & EXT4_EXT_DATA_VALID2) |
3300 | split_flag1 |= EXT4_EXT_DATA_VALID1; | 3313 | split_flag1 |= EXT4_EXT_DATA_VALID1; |
3301 | err = ext4_split_extent_at(handle, inode, path, | 3314 | err = ext4_split_extent_at(handle, inode, ppath, |
3302 | map->m_lblk + map->m_len, split_flag1, flags1); | 3315 | map->m_lblk + map->m_len, split_flag1, flags1); |
3303 | if (err) | 3316 | if (err) |
3304 | goto out; | 3317 | goto out; |
@@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle, | |||
3309 | * Update path is required because previous ext4_split_extent_at() may | 3322 | * Update path is required because previous ext4_split_extent_at() may |
3310 | * result in split of original leaf or extent zeroout. | 3323 | * result in split of original leaf or extent zeroout. |
3311 | */ | 3324 | */ |
3312 | ext4_ext_drop_refs(path); | 3325 | path = ext4_find_extent(inode, map->m_lblk, ppath, 0); |
3313 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | ||
3314 | if (IS_ERR(path)) | 3326 | if (IS_ERR(path)) |
3315 | return PTR_ERR(path); | 3327 | return PTR_ERR(path); |
3316 | depth = ext_depth(inode); | 3328 | depth = ext_depth(inode); |
@@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle, | |||
3330 | split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | | 3342 | split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | |
3331 | EXT4_EXT_MARK_UNWRIT2); | 3343 | EXT4_EXT_MARK_UNWRIT2); |
3332 | } | 3344 | } |
3333 | err = ext4_split_extent_at(handle, inode, path, | 3345 | err = ext4_split_extent_at(handle, inode, ppath, |
3334 | map->m_lblk, split_flag1, flags); | 3346 | map->m_lblk, split_flag1, flags); |
3335 | if (err) | 3347 | if (err) |
3336 | goto out; | 3348 | goto out; |
@@ -3364,9 +3376,10 @@ out: | |||
3364 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 3376 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
3365 | struct inode *inode, | 3377 | struct inode *inode, |
3366 | struct ext4_map_blocks *map, | 3378 | struct ext4_map_blocks *map, |
3367 | struct ext4_ext_path *path, | 3379 | struct ext4_ext_path **ppath, |
3368 | int flags) | 3380 | int flags) |
3369 | { | 3381 | { |
3382 | struct ext4_ext_path *path = *ppath; | ||
3370 | struct ext4_sb_info *sbi; | 3383 | struct ext4_sb_info *sbi; |
3371 | struct ext4_extent_header *eh; | 3384 | struct ext4_extent_header *eh; |
3372 | struct ext4_map_blocks split_map; | 3385 | struct ext4_map_blocks split_map; |
@@ -3590,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3590 | } | 3603 | } |
3591 | } | 3604 | } |
3592 | 3605 | ||
3593 | allocated = ext4_split_extent(handle, inode, path, | 3606 | err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, |
3594 | &split_map, split_flag, flags); | 3607 | flags); |
3595 | if (allocated < 0) | 3608 | if (err > 0) |
3596 | err = allocated; | 3609 | err = 0; |
3597 | |||
3598 | out: | 3610 | out: |
3599 | /* If we have gotten a failure, don't zero out status tree */ | 3611 | /* If we have gotten a failure, don't zero out status tree */ |
3600 | if (!err) | 3612 | if (!err) |
@@ -3629,9 +3641,10 @@ out: | |||
3629 | static int ext4_split_convert_extents(handle_t *handle, | 3641 | static int ext4_split_convert_extents(handle_t *handle, |
3630 | struct inode *inode, | 3642 | struct inode *inode, |
3631 | struct ext4_map_blocks *map, | 3643 | struct ext4_map_blocks *map, |
3632 | struct ext4_ext_path *path, | 3644 | struct ext4_ext_path **ppath, |
3633 | int flags) | 3645 | int flags) |
3634 | { | 3646 | { |
3647 | struct ext4_ext_path *path = *ppath; | ||
3635 | ext4_lblk_t eof_block; | 3648 | ext4_lblk_t eof_block; |
3636 | ext4_lblk_t ee_block; | 3649 | ext4_lblk_t ee_block; |
3637 | struct ext4_extent *ex; | 3650 | struct ext4_extent *ex; |
@@ -3665,74 +3678,15 @@ static int ext4_split_convert_extents(handle_t *handle, | |||
3665 | split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); | 3678 | split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); |
3666 | } | 3679 | } |
3667 | flags |= EXT4_GET_BLOCKS_PRE_IO; | 3680 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
3668 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); | 3681 | return ext4_split_extent(handle, inode, ppath, map, split_flag, flags); |
3669 | } | 3682 | } |
3670 | 3683 | ||
3671 | static int ext4_convert_initialized_extents(handle_t *handle, | ||
3672 | struct inode *inode, | ||
3673 | struct ext4_map_blocks *map, | ||
3674 | struct ext4_ext_path *path) | ||
3675 | { | ||
3676 | struct ext4_extent *ex; | ||
3677 | ext4_lblk_t ee_block; | ||
3678 | unsigned int ee_len; | ||
3679 | int depth; | ||
3680 | int err = 0; | ||
3681 | |||
3682 | depth = ext_depth(inode); | ||
3683 | ex = path[depth].p_ext; | ||
3684 | ee_block = le32_to_cpu(ex->ee_block); | ||
3685 | ee_len = ext4_ext_get_actual_len(ex); | ||
3686 | |||
3687 | ext_debug("%s: inode %lu, logical" | ||
3688 | "block %llu, max_blocks %u\n", __func__, inode->i_ino, | ||
3689 | (unsigned long long)ee_block, ee_len); | ||
3690 | |||
3691 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
3692 | err = ext4_split_convert_extents(handle, inode, map, path, | ||
3693 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); | ||
3694 | if (err < 0) | ||
3695 | goto out; | ||
3696 | ext4_ext_drop_refs(path); | ||
3697 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | ||
3698 | if (IS_ERR(path)) { | ||
3699 | err = PTR_ERR(path); | ||
3700 | goto out; | ||
3701 | } | ||
3702 | depth = ext_depth(inode); | ||
3703 | ex = path[depth].p_ext; | ||
3704 | if (!ex) { | ||
3705 | EXT4_ERROR_INODE(inode, "unexpected hole at %lu", | ||
3706 | (unsigned long) map->m_lblk); | ||
3707 | err = -EIO; | ||
3708 | goto out; | ||
3709 | } | ||
3710 | } | ||
3711 | |||
3712 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3713 | if (err) | ||
3714 | goto out; | ||
3715 | /* first mark the extent as unwritten */ | ||
3716 | ext4_ext_mark_unwritten(ex); | ||
3717 | |||
3718 | /* note: ext4_ext_correct_indexes() isn't needed here because | ||
3719 | * borders are not changed | ||
3720 | */ | ||
3721 | ext4_ext_try_to_merge(handle, inode, path, ex); | ||
3722 | |||
3723 | /* Mark modified extent as dirty */ | ||
3724 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | ||
3725 | out: | ||
3726 | ext4_ext_show_leaf(inode, path); | ||
3727 | return err; | ||
3728 | } | ||
3729 | |||
3730 | |||
3731 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3684 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
3732 | struct inode *inode, | 3685 | struct inode *inode, |
3733 | struct ext4_map_blocks *map, | 3686 | struct ext4_map_blocks *map, |
3734 | struct ext4_ext_path *path) | 3687 | struct ext4_ext_path **ppath) |
3735 | { | 3688 | { |
3689 | struct ext4_ext_path *path = *ppath; | ||
3736 | struct ext4_extent *ex; | 3690 | struct ext4_extent *ex; |
3737 | ext4_lblk_t ee_block; | 3691 | ext4_lblk_t ee_block; |
3738 | unsigned int ee_len; | 3692 | unsigned int ee_len; |
@@ -3761,16 +3715,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3761 | inode->i_ino, (unsigned long long)ee_block, ee_len, | 3715 | inode->i_ino, (unsigned long long)ee_block, ee_len, |
3762 | (unsigned long long)map->m_lblk, map->m_len); | 3716 | (unsigned long long)map->m_lblk, map->m_len); |
3763 | #endif | 3717 | #endif |
3764 | err = ext4_split_convert_extents(handle, inode, map, path, | 3718 | err = ext4_split_convert_extents(handle, inode, map, ppath, |
3765 | EXT4_GET_BLOCKS_CONVERT); | 3719 | EXT4_GET_BLOCKS_CONVERT); |
3766 | if (err < 0) | 3720 | if (err < 0) |
3767 | goto out; | 3721 | return err; |
3768 | ext4_ext_drop_refs(path); | 3722 | path = ext4_find_extent(inode, map->m_lblk, ppath, 0); |
3769 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | 3723 | if (IS_ERR(path)) |
3770 | if (IS_ERR(path)) { | 3724 | return PTR_ERR(path); |
3771 | err = PTR_ERR(path); | ||
3772 | goto out; | ||
3773 | } | ||
3774 | depth = ext_depth(inode); | 3725 | depth = ext_depth(inode); |
3775 | ex = path[depth].p_ext; | 3726 | ex = path[depth].p_ext; |
3776 | } | 3727 | } |
@@ -3963,12 +3914,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
3963 | } | 3914 | } |
3964 | 3915 | ||
3965 | static int | 3916 | static int |
3966 | ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, | 3917 | convert_initialized_extent(handle_t *handle, struct inode *inode, |
3967 | struct ext4_map_blocks *map, | 3918 | struct ext4_map_blocks *map, |
3968 | struct ext4_ext_path *path, int flags, | 3919 | struct ext4_ext_path **ppath, int flags, |
3969 | unsigned int allocated, ext4_fsblk_t newblock) | 3920 | unsigned int allocated, ext4_fsblk_t newblock) |
3970 | { | 3921 | { |
3971 | int ret = 0; | 3922 | struct ext4_ext_path *path = *ppath; |
3923 | struct ext4_extent *ex; | ||
3924 | ext4_lblk_t ee_block; | ||
3925 | unsigned int ee_len; | ||
3926 | int depth; | ||
3972 | int err = 0; | 3927 | int err = 0; |
3973 | 3928 | ||
3974 | /* | 3929 | /* |
@@ -3978,28 +3933,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, | |||
3978 | if (map->m_len > EXT_UNWRITTEN_MAX_LEN) | 3933 | if (map->m_len > EXT_UNWRITTEN_MAX_LEN) |
3979 | map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; | 3934 | map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; |
3980 | 3935 | ||
3981 | ret = ext4_convert_initialized_extents(handle, inode, map, | 3936 | depth = ext_depth(inode); |
3982 | path); | 3937 | ex = path[depth].p_ext; |
3983 | if (ret >= 0) { | 3938 | ee_block = le32_to_cpu(ex->ee_block); |
3984 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3939 | ee_len = ext4_ext_get_actual_len(ex); |
3985 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | 3940 | |
3986 | path, map->m_len); | 3941 | ext_debug("%s: inode %lu, logical" |
3987 | } else | 3942 | "block %llu, max_blocks %u\n", __func__, inode->i_ino, |
3988 | err = ret; | 3943 | (unsigned long long)ee_block, ee_len); |
3944 | |||
3945 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
3946 | err = ext4_split_convert_extents(handle, inode, map, ppath, | ||
3947 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); | ||
3948 | if (err < 0) | ||
3949 | return err; | ||
3950 | path = ext4_find_extent(inode, map->m_lblk, ppath, 0); | ||
3951 | if (IS_ERR(path)) | ||
3952 | return PTR_ERR(path); | ||
3953 | depth = ext_depth(inode); | ||
3954 | ex = path[depth].p_ext; | ||
3955 | if (!ex) { | ||
3956 | EXT4_ERROR_INODE(inode, "unexpected hole at %lu", | ||
3957 | (unsigned long) map->m_lblk); | ||
3958 | return -EIO; | ||
3959 | } | ||
3960 | } | ||
3961 | |||
3962 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3963 | if (err) | ||
3964 | return err; | ||
3965 | /* first mark the extent as unwritten */ | ||
3966 | ext4_ext_mark_unwritten(ex); | ||
3967 | |||
3968 | /* note: ext4_ext_correct_indexes() isn't needed here because | ||
3969 | * borders are not changed | ||
3970 | */ | ||
3971 | ext4_ext_try_to_merge(handle, inode, path, ex); | ||
3972 | |||
3973 | /* Mark modified extent as dirty */ | ||
3974 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | ||
3975 | if (err) | ||
3976 | return err; | ||
3977 | ext4_ext_show_leaf(inode, path); | ||
3978 | |||
3979 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
3980 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len); | ||
3981 | if (err) | ||
3982 | return err; | ||
3989 | map->m_flags |= EXT4_MAP_UNWRITTEN; | 3983 | map->m_flags |= EXT4_MAP_UNWRITTEN; |
3990 | if (allocated > map->m_len) | 3984 | if (allocated > map->m_len) |
3991 | allocated = map->m_len; | 3985 | allocated = map->m_len; |
3992 | map->m_len = allocated; | 3986 | map->m_len = allocated; |
3993 | 3987 | return allocated; | |
3994 | return err ? err : allocated; | ||
3995 | } | 3988 | } |
3996 | 3989 | ||
3997 | static int | 3990 | static int |
3998 | ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | 3991 | ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, |
3999 | struct ext4_map_blocks *map, | 3992 | struct ext4_map_blocks *map, |
4000 | struct ext4_ext_path *path, int flags, | 3993 | struct ext4_ext_path **ppath, int flags, |
4001 | unsigned int allocated, ext4_fsblk_t newblock) | 3994 | unsigned int allocated, ext4_fsblk_t newblock) |
4002 | { | 3995 | { |
3996 | struct ext4_ext_path *path = *ppath; | ||
4003 | int ret = 0; | 3997 | int ret = 0; |
4004 | int err = 0; | 3998 | int err = 0; |
4005 | ext4_io_end_t *io = ext4_inode_aio(inode); | 3999 | ext4_io_end_t *io = ext4_inode_aio(inode); |
@@ -4021,8 +4015,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | |||
4021 | 4015 | ||
4022 | /* get_block() before submit the IO, split the extent */ | 4016 | /* get_block() before submit the IO, split the extent */ |
4023 | if (flags & EXT4_GET_BLOCKS_PRE_IO) { | 4017 | if (flags & EXT4_GET_BLOCKS_PRE_IO) { |
4024 | ret = ext4_split_convert_extents(handle, inode, map, | 4018 | ret = ext4_split_convert_extents(handle, inode, map, ppath, |
4025 | path, flags | EXT4_GET_BLOCKS_CONVERT); | 4019 | flags | EXT4_GET_BLOCKS_CONVERT); |
4026 | if (ret <= 0) | 4020 | if (ret <= 0) |
4027 | goto out; | 4021 | goto out; |
4028 | /* | 4022 | /* |
@@ -4040,7 +4034,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | |||
4040 | /* IO end_io complete, convert the filled extent to written */ | 4034 | /* IO end_io complete, convert the filled extent to written */ |
4041 | if (flags & EXT4_GET_BLOCKS_CONVERT) { | 4035 | if (flags & EXT4_GET_BLOCKS_CONVERT) { |
4042 | ret = ext4_convert_unwritten_extents_endio(handle, inode, map, | 4036 | ret = ext4_convert_unwritten_extents_endio(handle, inode, map, |
4043 | path); | 4037 | ppath); |
4044 | if (ret >= 0) { | 4038 | if (ret >= 0) { |
4045 | ext4_update_inode_fsync_trans(handle, inode, 1); | 4039 | ext4_update_inode_fsync_trans(handle, inode, 1); |
4046 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | 4040 | err = check_eofblocks_fl(handle, inode, map->m_lblk, |
@@ -4078,7 +4072,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | |||
4078 | } | 4072 | } |
4079 | 4073 | ||
4080 | /* buffered write, writepage time, convert*/ | 4074 | /* buffered write, writepage time, convert*/ |
4081 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags); | 4075 | ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags); |
4082 | if (ret >= 0) | 4076 | if (ret >= 0) |
4083 | ext4_update_inode_fsync_trans(handle, inode, 1); | 4077 | ext4_update_inode_fsync_trans(handle, inode, 1); |
4084 | out: | 4078 | out: |
@@ -4279,7 +4273,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4279 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 4273 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
4280 | 4274 | ||
4281 | /* find extent for this block */ | 4275 | /* find extent for this block */ |
4282 | path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0); | 4276 | path = ext4_find_extent(inode, map->m_lblk, NULL, 0); |
4283 | if (IS_ERR(path)) { | 4277 | if (IS_ERR(path)) { |
4284 | err = PTR_ERR(path); | 4278 | err = PTR_ERR(path); |
4285 | path = NULL; | 4279 | path = NULL; |
@@ -4291,7 +4285,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4291 | /* | 4285 | /* |
4292 | * consistent leaf must not be empty; | 4286 | * consistent leaf must not be empty; |
4293 | * this situation is possible, though, _during_ tree modification; | 4287 | * this situation is possible, though, _during_ tree modification; |
4294 | * this is why assert can't be put in ext4_ext_find_extent() | 4288 | * this is why assert can't be put in ext4_find_extent() |
4295 | */ | 4289 | */ |
4296 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { | 4290 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { |
4297 | EXT4_ERROR_INODE(inode, "bad extent address " | 4291 | EXT4_ERROR_INODE(inode, "bad extent address " |
@@ -4331,15 +4325,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4331 | */ | 4325 | */ |
4332 | if ((!ext4_ext_is_unwritten(ex)) && | 4326 | if ((!ext4_ext_is_unwritten(ex)) && |
4333 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { | 4327 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { |
4334 | allocated = ext4_ext_convert_initialized_extent( | 4328 | allocated = convert_initialized_extent( |
4335 | handle, inode, map, path, flags, | 4329 | handle, inode, map, &path, |
4336 | allocated, newblock); | 4330 | flags, allocated, newblock); |
4337 | goto out2; | 4331 | goto out2; |
4338 | } else if (!ext4_ext_is_unwritten(ex)) | 4332 | } else if (!ext4_ext_is_unwritten(ex)) |
4339 | goto out; | 4333 | goto out; |
4340 | 4334 | ||
4341 | ret = ext4_ext_handle_unwritten_extents( | 4335 | ret = ext4_ext_handle_unwritten_extents( |
4342 | handle, inode, map, path, flags, | 4336 | handle, inode, map, &path, flags, |
4343 | allocated, newblock); | 4337 | allocated, newblock); |
4344 | if (ret < 0) | 4338 | if (ret < 0) |
4345 | err = ret; | 4339 | err = ret; |
@@ -4376,7 +4370,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4376 | 4370 | ||
4377 | /* | 4371 | /* |
4378 | * If we are doing bigalloc, check to see if the extent returned | 4372 | * If we are doing bigalloc, check to see if the extent returned |
4379 | * by ext4_ext_find_extent() implies a cluster we can use. | 4373 | * by ext4_find_extent() implies a cluster we can use. |
4380 | */ | 4374 | */ |
4381 | if (cluster_offset && ex && | 4375 | if (cluster_offset && ex && |
4382 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { | 4376 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { |
@@ -4451,6 +4445,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4451 | ar.flags = 0; | 4445 | ar.flags = 0; |
4452 | if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) | 4446 | if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) |
4453 | ar.flags |= EXT4_MB_HINT_NOPREALLOC; | 4447 | ar.flags |= EXT4_MB_HINT_NOPREALLOC; |
4448 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
4449 | ar.flags |= EXT4_MB_DELALLOC_RESERVED; | ||
4454 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 4450 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
4455 | if (!newblock) | 4451 | if (!newblock) |
4456 | goto out2; | 4452 | goto out2; |
@@ -4486,7 +4482,7 @@ got_allocated_blocks: | |||
4486 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | 4482 | err = check_eofblocks_fl(handle, inode, map->m_lblk, |
4487 | path, ar.len); | 4483 | path, ar.len); |
4488 | if (!err) | 4484 | if (!err) |
4489 | err = ext4_ext_insert_extent(handle, inode, path, | 4485 | err = ext4_ext_insert_extent(handle, inode, &path, |
4490 | &newex, flags); | 4486 | &newex, flags); |
4491 | 4487 | ||
4492 | if (!err && set_unwritten) { | 4488 | if (!err && set_unwritten) { |
@@ -4619,10 +4615,8 @@ out: | |||
4619 | map->m_pblk = newblock; | 4615 | map->m_pblk = newblock; |
4620 | map->m_len = allocated; | 4616 | map->m_len = allocated; |
4621 | out2: | 4617 | out2: |
4622 | if (path) { | 4618 | ext4_ext_drop_refs(path); |
4623 | ext4_ext_drop_refs(path); | 4619 | kfree(path); |
4624 | kfree(path); | ||
4625 | } | ||
4626 | 4620 | ||
4627 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4621 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4628 | err ? err : allocated); | 4622 | err ? err : allocated); |
@@ -4799,7 +4793,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4799 | max_blocks -= lblk; | 4793 | max_blocks -= lblk; |
4800 | 4794 | ||
4801 | flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | | 4795 | flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | |
4802 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; | 4796 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | |
4797 | EXT4_EX_NOCACHE; | ||
4803 | if (mode & FALLOC_FL_KEEP_SIZE) | 4798 | if (mode & FALLOC_FL_KEEP_SIZE) |
4804 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | 4799 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; |
4805 | 4800 | ||
@@ -4837,15 +4832,21 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4837 | ext4_inode_block_unlocked_dio(inode); | 4832 | ext4_inode_block_unlocked_dio(inode); |
4838 | inode_dio_wait(inode); | 4833 | inode_dio_wait(inode); |
4839 | 4834 | ||
4835 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | ||
4836 | flags, mode); | ||
4837 | if (ret) | ||
4838 | goto out_dio; | ||
4840 | /* | 4839 | /* |
4841 | * Remove entire range from the extent status tree. | 4840 | * Remove entire range from the extent status tree. |
4841 | * | ||
4842 | * ext4_es_remove_extent(inode, lblk, max_blocks) is | ||
4843 | * NOT sufficient. I'm not sure why this is the case, | ||
4844 | * but let's be conservative and remove the extent | ||
4845 | * status tree for the entire inode. There should be | ||
4846 | * no outstanding delalloc extents thanks to the | ||
4847 | * filemap_write_and_wait_range() call above. | ||
4842 | */ | 4848 | */ |
4843 | ret = ext4_es_remove_extent(inode, lblk, max_blocks); | 4849 | ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
4844 | if (ret) | ||
4845 | goto out_dio; | ||
4846 | |||
4847 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | ||
4848 | flags, mode); | ||
4849 | if (ret) | 4850 | if (ret) |
4850 | goto out_dio; | 4851 | goto out_dio; |
4851 | } | 4852 | } |
@@ -5304,36 +5305,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
5304 | struct ext4_ext_path *path; | 5305 | struct ext4_ext_path *path; |
5305 | int ret = 0, depth; | 5306 | int ret = 0, depth; |
5306 | struct ext4_extent *extent; | 5307 | struct ext4_extent *extent; |
5307 | ext4_lblk_t stop_block, current_block; | 5308 | ext4_lblk_t stop_block; |
5308 | ext4_lblk_t ex_start, ex_end; | 5309 | ext4_lblk_t ex_start, ex_end; |
5309 | 5310 | ||
5310 | /* Let path point to the last extent */ | 5311 | /* Let path point to the last extent */ |
5311 | path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); | 5312 | path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); |
5312 | if (IS_ERR(path)) | 5313 | if (IS_ERR(path)) |
5313 | return PTR_ERR(path); | 5314 | return PTR_ERR(path); |
5314 | 5315 | ||
5315 | depth = path->p_depth; | 5316 | depth = path->p_depth; |
5316 | extent = path[depth].p_ext; | 5317 | extent = path[depth].p_ext; |
5317 | if (!extent) { | 5318 | if (!extent) |
5318 | ext4_ext_drop_refs(path); | 5319 | goto out; |
5319 | kfree(path); | ||
5320 | return ret; | ||
5321 | } | ||
5322 | 5320 | ||
5323 | stop_block = le32_to_cpu(extent->ee_block) + | 5321 | stop_block = le32_to_cpu(extent->ee_block) + |
5324 | ext4_ext_get_actual_len(extent); | 5322 | ext4_ext_get_actual_len(extent); |
5325 | ext4_ext_drop_refs(path); | ||
5326 | kfree(path); | ||
5327 | 5323 | ||
5328 | /* Nothing to shift, if hole is at the end of file */ | 5324 | /* Nothing to shift, if hole is at the end of file */ |
5329 | if (start >= stop_block) | 5325 | if (start >= stop_block) |
5330 | return ret; | 5326 | goto out; |
5331 | 5327 | ||
5332 | /* | 5328 | /* |
5333 | * Don't start shifting extents until we make sure the hole is big | 5329 | * Don't start shifting extents until we make sure the hole is big |
5334 | * enough to accomodate the shift. | 5330 | * enough to accomodate the shift. |
5335 | */ | 5331 | */ |
5336 | path = ext4_ext_find_extent(inode, start - 1, NULL, 0); | 5332 | path = ext4_find_extent(inode, start - 1, &path, 0); |
5337 | if (IS_ERR(path)) | 5333 | if (IS_ERR(path)) |
5338 | return PTR_ERR(path); | 5334 | return PTR_ERR(path); |
5339 | depth = path->p_depth; | 5335 | depth = path->p_depth; |
@@ -5346,8 +5342,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
5346 | ex_start = 0; | 5342 | ex_start = 0; |
5347 | ex_end = 0; | 5343 | ex_end = 0; |
5348 | } | 5344 | } |
5349 | ext4_ext_drop_refs(path); | ||
5350 | kfree(path); | ||
5351 | 5345 | ||
5352 | if ((start == ex_start && shift > ex_start) || | 5346 | if ((start == ex_start && shift > ex_start) || |
5353 | (shift > start - ex_end)) | 5347 | (shift > start - ex_end)) |
@@ -5355,7 +5349,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
5355 | 5349 | ||
5356 | /* Its safe to start updating extents */ | 5350 | /* Its safe to start updating extents */ |
5357 | while (start < stop_block) { | 5351 | while (start < stop_block) { |
5358 | path = ext4_ext_find_extent(inode, start, NULL, 0); | 5352 | path = ext4_find_extent(inode, start, &path, 0); |
5359 | if (IS_ERR(path)) | 5353 | if (IS_ERR(path)) |
5360 | return PTR_ERR(path); | 5354 | return PTR_ERR(path); |
5361 | depth = path->p_depth; | 5355 | depth = path->p_depth; |
@@ -5365,27 +5359,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
5365 | (unsigned long) start); | 5359 | (unsigned long) start); |
5366 | return -EIO; | 5360 | return -EIO; |
5367 | } | 5361 | } |
5368 | 5362 | if (start > le32_to_cpu(extent->ee_block)) { | |
5369 | current_block = le32_to_cpu(extent->ee_block); | ||
5370 | if (start > current_block) { | ||
5371 | /* Hole, move to the next extent */ | 5363 | /* Hole, move to the next extent */ |
5372 | ret = mext_next_extent(inode, path, &extent); | 5364 | if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { |
5373 | if (ret != 0) { | 5365 | path[depth].p_ext++; |
5374 | ext4_ext_drop_refs(path); | 5366 | } else { |
5375 | kfree(path); | 5367 | start = ext4_ext_next_allocated_block(path); |
5376 | if (ret == 1) | 5368 | continue; |
5377 | ret = 0; | ||
5378 | break; | ||
5379 | } | 5369 | } |
5380 | } | 5370 | } |
5381 | ret = ext4_ext_shift_path_extents(path, shift, inode, | 5371 | ret = ext4_ext_shift_path_extents(path, shift, inode, |
5382 | handle, &start); | 5372 | handle, &start); |
5383 | ext4_ext_drop_refs(path); | ||
5384 | kfree(path); | ||
5385 | if (ret) | 5373 | if (ret) |
5386 | break; | 5374 | break; |
5387 | } | 5375 | } |
5388 | 5376 | out: | |
5377 | ext4_ext_drop_refs(path); | ||
5378 | kfree(path); | ||
5389 | return ret; | 5379 | return ret; |
5390 | } | 5380 | } |
5391 | 5381 | ||
@@ -5508,3 +5498,199 @@ out_mutex: | |||
5508 | mutex_unlock(&inode->i_mutex); | 5498 | mutex_unlock(&inode->i_mutex); |
5509 | return ret; | 5499 | return ret; |
5510 | } | 5500 | } |
5501 | |||
5502 | /** | ||
5503 | * ext4_swap_extents - Swap extents between two inodes | ||
5504 | * | ||
5505 | * @inode1: First inode | ||
5506 | * @inode2: Second inode | ||
5507 | * @lblk1: Start block for first inode | ||
5508 | * @lblk2: Start block for second inode | ||
5509 | * @count: Number of blocks to swap | ||
5510 | * @mark_unwritten: Mark second inode's extents as unwritten after swap | ||
5511 | * @erp: Pointer to save error value | ||
5512 | * | ||
5513 | * This helper routine does exactly what is promise "swap extents". All other | ||
5514 | * stuff such as page-cache locking consistency, bh mapping consistency or | ||
5515 | * extent's data copying must be performed by caller. | ||
5516 | * Locking: | ||
5517 | * i_mutex is held for both inodes | ||
5518 | * i_data_sem is locked for write for both inodes | ||
5519 | * Assumptions: | ||
5520 | * All pages from requested range are locked for both inodes | ||
5521 | */ | ||
5522 | int | ||
5523 | ext4_swap_extents(handle_t *handle, struct inode *inode1, | ||
5524 | struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, | ||
5525 | ext4_lblk_t count, int unwritten, int *erp) | ||
5526 | { | ||
5527 | struct ext4_ext_path *path1 = NULL; | ||
5528 | struct ext4_ext_path *path2 = NULL; | ||
5529 | int replaced_count = 0; | ||
5530 | |||
5531 | BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem)); | ||
5532 | BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem)); | ||
5533 | BUG_ON(!mutex_is_locked(&inode1->i_mutex)); | ||
5534 | BUG_ON(!mutex_is_locked(&inode1->i_mutex)); | ||
5535 | |||
5536 | *erp = ext4_es_remove_extent(inode1, lblk1, count); | ||
5537 | if (unlikely(*erp)) | ||
5538 | return 0; | ||
5539 | *erp = ext4_es_remove_extent(inode2, lblk2, count); | ||
5540 | if (unlikely(*erp)) | ||
5541 | return 0; | ||
5542 | |||
5543 | while (count) { | ||
5544 | struct ext4_extent *ex1, *ex2, tmp_ex; | ||
5545 | ext4_lblk_t e1_blk, e2_blk; | ||
5546 | int e1_len, e2_len, len; | ||
5547 | int split = 0; | ||
5548 | |||
5549 | path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); | ||
5550 | if (unlikely(IS_ERR(path1))) { | ||
5551 | *erp = PTR_ERR(path1); | ||
5552 | path1 = NULL; | ||
5553 | finish: | ||
5554 | count = 0; | ||
5555 | goto repeat; | ||
5556 | } | ||
5557 | path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); | ||
5558 | if (unlikely(IS_ERR(path2))) { | ||
5559 | *erp = PTR_ERR(path2); | ||
5560 | path2 = NULL; | ||
5561 | goto finish; | ||
5562 | } | ||
5563 | ex1 = path1[path1->p_depth].p_ext; | ||
5564 | ex2 = path2[path2->p_depth].p_ext; | ||
5565 | /* Do we have somthing to swap ? */ | ||
5566 | if (unlikely(!ex2 || !ex1)) | ||
5567 | goto finish; | ||
5568 | |||
5569 | e1_blk = le32_to_cpu(ex1->ee_block); | ||
5570 | e2_blk = le32_to_cpu(ex2->ee_block); | ||
5571 | e1_len = ext4_ext_get_actual_len(ex1); | ||
5572 | e2_len = ext4_ext_get_actual_len(ex2); | ||
5573 | |||
5574 | /* Hole handling */ | ||
5575 | if (!in_range(lblk1, e1_blk, e1_len) || | ||
5576 | !in_range(lblk2, e2_blk, e2_len)) { | ||
5577 | ext4_lblk_t next1, next2; | ||
5578 | |||
5579 | /* if hole after extent, then go to next extent */ | ||
5580 | next1 = ext4_ext_next_allocated_block(path1); | ||
5581 | next2 = ext4_ext_next_allocated_block(path2); | ||
5582 | /* If hole before extent, then shift to that extent */ | ||
5583 | if (e1_blk > lblk1) | ||
5584 | next1 = e1_blk; | ||
5585 | if (e2_blk > lblk2) | ||
5586 | next2 = e1_blk; | ||
5587 | /* Do we have something to swap */ | ||
5588 | if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) | ||
5589 | goto finish; | ||
5590 | /* Move to the rightest boundary */ | ||
5591 | len = next1 - lblk1; | ||
5592 | if (len < next2 - lblk2) | ||
5593 | len = next2 - lblk2; | ||
5594 | if (len > count) | ||
5595 | len = count; | ||
5596 | lblk1 += len; | ||
5597 | lblk2 += len; | ||
5598 | count -= len; | ||
5599 | goto repeat; | ||
5600 | } | ||
5601 | |||
5602 | /* Prepare left boundary */ | ||
5603 | if (e1_blk < lblk1) { | ||
5604 | split = 1; | ||
5605 | *erp = ext4_force_split_extent_at(handle, inode1, | ||
5606 | &path1, lblk1, 0); | ||
5607 | if (unlikely(*erp)) | ||
5608 | goto finish; | ||
5609 | } | ||
5610 | if (e2_blk < lblk2) { | ||
5611 | split = 1; | ||
5612 | *erp = ext4_force_split_extent_at(handle, inode2, | ||
5613 | &path2, lblk2, 0); | ||
5614 | if (unlikely(*erp)) | ||
5615 | goto finish; | ||
5616 | } | ||
5617 | /* ext4_split_extent_at() may result in leaf extent split, | ||
5618 | * path must to be revalidated. */ | ||
5619 | if (split) | ||
5620 | goto repeat; | ||
5621 | |||
5622 | /* Prepare right boundary */ | ||
5623 | len = count; | ||
5624 | if (len > e1_blk + e1_len - lblk1) | ||
5625 | len = e1_blk + e1_len - lblk1; | ||
5626 | if (len > e2_blk + e2_len - lblk2) | ||
5627 | len = e2_blk + e2_len - lblk2; | ||
5628 | |||
5629 | if (len != e1_len) { | ||
5630 | split = 1; | ||
5631 | *erp = ext4_force_split_extent_at(handle, inode1, | ||
5632 | &path1, lblk1 + len, 0); | ||
5633 | if (unlikely(*erp)) | ||
5634 | goto finish; | ||
5635 | } | ||
5636 | if (len != e2_len) { | ||
5637 | split = 1; | ||
5638 | *erp = ext4_force_split_extent_at(handle, inode2, | ||
5639 | &path2, lblk2 + len, 0); | ||
5640 | if (*erp) | ||
5641 | goto finish; | ||
5642 | } | ||
5643 | /* ext4_split_extent_at() may result in leaf extent split, | ||
5644 | * path must to be revalidated. */ | ||
5645 | if (split) | ||
5646 | goto repeat; | ||
5647 | |||
5648 | BUG_ON(e2_len != e1_len); | ||
5649 | *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth); | ||
5650 | if (unlikely(*erp)) | ||
5651 | goto finish; | ||
5652 | *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth); | ||
5653 | if (unlikely(*erp)) | ||
5654 | goto finish; | ||
5655 | |||
5656 | /* Both extents are fully inside boundaries. Swap it now */ | ||
5657 | tmp_ex = *ex1; | ||
5658 | ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2)); | ||
5659 | ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex)); | ||
5660 | ex1->ee_len = cpu_to_le16(e2_len); | ||
5661 | ex2->ee_len = cpu_to_le16(e1_len); | ||
5662 | if (unwritten) | ||
5663 | ext4_ext_mark_unwritten(ex2); | ||
5664 | if (ext4_ext_is_unwritten(&tmp_ex)) | ||
5665 | ext4_ext_mark_unwritten(ex1); | ||
5666 | |||
5667 | ext4_ext_try_to_merge(handle, inode2, path2, ex2); | ||
5668 | ext4_ext_try_to_merge(handle, inode1, path1, ex1); | ||
5669 | *erp = ext4_ext_dirty(handle, inode2, path2 + | ||
5670 | path2->p_depth); | ||
5671 | if (unlikely(*erp)) | ||
5672 | goto finish; | ||
5673 | *erp = ext4_ext_dirty(handle, inode1, path1 + | ||
5674 | path1->p_depth); | ||
5675 | /* | ||
5676 | * Looks scarry ah..? second inode already points to new blocks, | ||
5677 | * and it was successfully dirtied. But luckily error may happen | ||
5678 | * only due to journal error, so full transaction will be | ||
5679 | * aborted anyway. | ||
5680 | */ | ||
5681 | if (unlikely(*erp)) | ||
5682 | goto finish; | ||
5683 | lblk1 += len; | ||
5684 | lblk2 += len; | ||
5685 | replaced_count += len; | ||
5686 | count -= len; | ||
5687 | |||
5688 | repeat: | ||
5689 | ext4_ext_drop_refs(path1); | ||
5690 | kfree(path1); | ||
5691 | ext4_ext_drop_refs(path2); | ||
5692 | kfree(path2); | ||
5693 | path1 = path2 = NULL; | ||
5694 | } | ||
5695 | return replaced_count; | ||
5696 | } | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 0b7e28e7eaa4..94e7855ae71b 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -11,6 +11,8 @@ | |||
11 | */ | 11 | */ |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/list_sort.h> | 13 | #include <linux/list_sort.h> |
14 | #include <linux/proc_fs.h> | ||
15 | #include <linux/seq_file.h> | ||
14 | #include "ext4.h" | 16 | #include "ext4.h" |
15 | #include "extents_status.h" | 17 | #include "extents_status.h" |
16 | 18 | ||
@@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
313 | */ | 315 | */ |
314 | if (!ext4_es_is_delayed(es)) { | 316 | if (!ext4_es_is_delayed(es)) { |
315 | EXT4_I(inode)->i_es_lru_nr++; | 317 | EXT4_I(inode)->i_es_lru_nr++; |
316 | percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); | 318 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> |
319 | s_es_stats.es_stats_lru_cnt); | ||
317 | } | 320 | } |
318 | 321 | ||
322 | EXT4_I(inode)->i_es_all_nr++; | ||
323 | percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | ||
324 | |||
319 | return es; | 325 | return es; |
320 | } | 326 | } |
321 | 327 | ||
322 | static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | 328 | static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) |
323 | { | 329 | { |
330 | EXT4_I(inode)->i_es_all_nr--; | ||
331 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | ||
332 | |||
324 | /* Decrease the lru counter when this es is not delayed */ | 333 | /* Decrease the lru counter when this es is not delayed */ |
325 | if (!ext4_es_is_delayed(es)) { | 334 | if (!ext4_es_is_delayed(es)) { |
326 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 335 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); |
327 | EXT4_I(inode)->i_es_lru_nr--; | 336 | EXT4_I(inode)->i_es_lru_nr--; |
328 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); | 337 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> |
338 | s_es_stats.es_stats_lru_cnt); | ||
329 | } | 339 | } |
330 | 340 | ||
331 | kmem_cache_free(ext4_es_cachep, es); | 341 | kmem_cache_free(ext4_es_cachep, es); |
@@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
426 | unsigned short ee_len; | 436 | unsigned short ee_len; |
427 | int depth, ee_status, es_status; | 437 | int depth, ee_status, es_status; |
428 | 438 | ||
429 | path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); | 439 | path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); |
430 | if (IS_ERR(path)) | 440 | if (IS_ERR(path)) |
431 | return; | 441 | return; |
432 | 442 | ||
@@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
499 | } | 509 | } |
500 | } | 510 | } |
501 | out: | 511 | out: |
502 | if (path) { | 512 | ext4_ext_drop_refs(path); |
503 | ext4_ext_drop_refs(path); | 513 | kfree(path); |
504 | kfree(path); | ||
505 | } | ||
506 | } | 514 | } |
507 | 515 | ||
508 | static void ext4_es_insert_extent_ind_check(struct inode *inode, | 516 | static void ext4_es_insert_extent_ind_check(struct inode *inode, |
@@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | |||
731 | struct extent_status *es) | 739 | struct extent_status *es) |
732 | { | 740 | { |
733 | struct ext4_es_tree *tree; | 741 | struct ext4_es_tree *tree; |
742 | struct ext4_es_stats *stats; | ||
734 | struct extent_status *es1 = NULL; | 743 | struct extent_status *es1 = NULL; |
735 | struct rb_node *node; | 744 | struct rb_node *node; |
736 | int found = 0; | 745 | int found = 0; |
@@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | |||
767 | } | 776 | } |
768 | 777 | ||
769 | out: | 778 | out: |
779 | stats = &EXT4_SB(inode->i_sb)->s_es_stats; | ||
770 | if (found) { | 780 | if (found) { |
771 | BUG_ON(!es1); | 781 | BUG_ON(!es1); |
772 | es->es_lblk = es1->es_lblk; | 782 | es->es_lblk = es1->es_lblk; |
773 | es->es_len = es1->es_len; | 783 | es->es_len = es1->es_len; |
774 | es->es_pblk = es1->es_pblk; | 784 | es->es_pblk = es1->es_pblk; |
785 | stats->es_stats_cache_hits++; | ||
786 | } else { | ||
787 | stats->es_stats_cache_misses++; | ||
775 | } | 788 | } |
776 | 789 | ||
777 | read_unlock(&EXT4_I(inode)->i_es_lock); | 790 | read_unlock(&EXT4_I(inode)->i_es_lock); |
@@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | |||
933 | struct ext4_inode_info *locked_ei) | 946 | struct ext4_inode_info *locked_ei) |
934 | { | 947 | { |
935 | struct ext4_inode_info *ei; | 948 | struct ext4_inode_info *ei; |
949 | struct ext4_es_stats *es_stats; | ||
936 | struct list_head *cur, *tmp; | 950 | struct list_head *cur, *tmp; |
937 | LIST_HEAD(skipped); | 951 | LIST_HEAD(skipped); |
952 | ktime_t start_time; | ||
953 | u64 scan_time; | ||
938 | int nr_shrunk = 0; | 954 | int nr_shrunk = 0; |
939 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 955 | int retried = 0, skip_precached = 1, nr_skipped = 0; |
940 | 956 | ||
957 | es_stats = &sbi->s_es_stats; | ||
958 | start_time = ktime_get(); | ||
941 | spin_lock(&sbi->s_es_lru_lock); | 959 | spin_lock(&sbi->s_es_lru_lock); |
942 | 960 | ||
943 | retry: | 961 | retry: |
@@ -948,7 +966,8 @@ retry: | |||
948 | * If we have already reclaimed all extents from extent | 966 | * If we have already reclaimed all extents from extent |
949 | * status tree, just stop the loop immediately. | 967 | * status tree, just stop the loop immediately. |
950 | */ | 968 | */ |
951 | if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | 969 | if (percpu_counter_read_positive( |
970 | &es_stats->es_stats_lru_cnt) == 0) | ||
952 | break; | 971 | break; |
953 | 972 | ||
954 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 973 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
@@ -958,7 +977,7 @@ retry: | |||
958 | * time. Normally we try hard to avoid shrinking | 977 | * time. Normally we try hard to avoid shrinking |
959 | * precached inodes, but we will as a last resort. | 978 | * precached inodes, but we will as a last resort. |
960 | */ | 979 | */ |
961 | if ((sbi->s_es_last_sorted < ei->i_touch_when) || | 980 | if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || |
962 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 981 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, |
963 | EXT4_STATE_EXT_PRECACHED))) { | 982 | EXT4_STATE_EXT_PRECACHED))) { |
964 | nr_skipped++; | 983 | nr_skipped++; |
@@ -992,7 +1011,7 @@ retry: | |||
992 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | 1011 | if ((nr_shrunk == 0) && nr_skipped && !retried) { |
993 | retried++; | 1012 | retried++; |
994 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | 1013 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); |
995 | sbi->s_es_last_sorted = jiffies; | 1014 | es_stats->es_stats_last_sorted = jiffies; |
996 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | 1015 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, |
997 | i_es_lru); | 1016 | i_es_lru); |
998 | /* | 1017 | /* |
@@ -1010,6 +1029,22 @@ retry: | |||
1010 | if (locked_ei && nr_shrunk == 0) | 1029 | if (locked_ei && nr_shrunk == 0) |
1011 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); |
1012 | 1031 | ||
1032 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | ||
1033 | if (likely(es_stats->es_stats_scan_time)) | ||
1034 | es_stats->es_stats_scan_time = (scan_time + | ||
1035 | es_stats->es_stats_scan_time*3) / 4; | ||
1036 | else | ||
1037 | es_stats->es_stats_scan_time = scan_time; | ||
1038 | if (scan_time > es_stats->es_stats_max_scan_time) | ||
1039 | es_stats->es_stats_max_scan_time = scan_time; | ||
1040 | if (likely(es_stats->es_stats_shrunk)) | ||
1041 | es_stats->es_stats_shrunk = (nr_shrunk + | ||
1042 | es_stats->es_stats_shrunk*3) / 4; | ||
1043 | else | ||
1044 | es_stats->es_stats_shrunk = nr_shrunk; | ||
1045 | |||
1046 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, | ||
1047 | nr_skipped, retried); | ||
1013 | return nr_shrunk; | 1048 | return nr_shrunk; |
1014 | } | 1049 | } |
1015 | 1050 | ||
@@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink, | |||
1020 | struct ext4_sb_info *sbi; | 1055 | struct ext4_sb_info *sbi; |
1021 | 1056 | ||
1022 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 1057 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); |
1023 | nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 1058 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); |
1024 | trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); | 1059 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); |
1025 | return nr; | 1060 | return nr; |
1026 | } | 1061 | } |
1027 | 1062 | ||
@@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, | |||
1033 | int nr_to_scan = sc->nr_to_scan; | 1068 | int nr_to_scan = sc->nr_to_scan; |
1034 | int ret, nr_shrunk; | 1069 | int ret, nr_shrunk; |
1035 | 1070 | ||
1036 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 1071 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); |
1037 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | 1072 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); |
1038 | 1073 | ||
1039 | if (!nr_to_scan) | 1074 | if (!nr_to_scan) |
1040 | return ret; | 1075 | return ret; |
1041 | 1076 | ||
1042 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1077 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); |
1043 | 1078 | ||
1044 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 1079 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); |
1045 | return nr_shrunk; | 1080 | return nr_shrunk; |
1046 | } | 1081 | } |
1047 | 1082 | ||
1048 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) | 1083 | static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos) |
1049 | { | 1084 | { |
1085 | return *pos ? NULL : SEQ_START_TOKEN; | ||
1086 | } | ||
1087 | |||
1088 | static void * | ||
1089 | ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1090 | { | ||
1091 | return NULL; | ||
1092 | } | ||
1093 | |||
1094 | static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | ||
1095 | { | ||
1096 | struct ext4_sb_info *sbi = seq->private; | ||
1097 | struct ext4_es_stats *es_stats = &sbi->s_es_stats; | ||
1098 | struct ext4_inode_info *ei, *max = NULL; | ||
1099 | unsigned int inode_cnt = 0; | ||
1100 | |||
1101 | if (v != SEQ_START_TOKEN) | ||
1102 | return 0; | ||
1103 | |||
1104 | /* here we just find an inode that has the max nr. of objects */ | ||
1105 | spin_lock(&sbi->s_es_lru_lock); | ||
1106 | list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { | ||
1107 | inode_cnt++; | ||
1108 | if (max && max->i_es_all_nr < ei->i_es_all_nr) | ||
1109 | max = ei; | ||
1110 | else if (!max) | ||
1111 | max = ei; | ||
1112 | } | ||
1113 | spin_unlock(&sbi->s_es_lru_lock); | ||
1114 | |||
1115 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | ||
1116 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | ||
1117 | percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); | ||
1118 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | ||
1119 | es_stats->es_stats_cache_hits, | ||
1120 | es_stats->es_stats_cache_misses); | ||
1121 | if (es_stats->es_stats_last_sorted != 0) | ||
1122 | seq_printf(seq, " %u ms last sorted interval\n", | ||
1123 | jiffies_to_msecs(jiffies - | ||
1124 | es_stats->es_stats_last_sorted)); | ||
1125 | if (inode_cnt) | ||
1126 | seq_printf(seq, " %d inodes on lru list\n", inode_cnt); | ||
1127 | |||
1128 | seq_printf(seq, "average:\n %llu us scan time\n", | ||
1129 | div_u64(es_stats->es_stats_scan_time, 1000)); | ||
1130 | seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk); | ||
1131 | if (inode_cnt) | ||
1132 | seq_printf(seq, | ||
1133 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" | ||
1134 | " %llu us max scan time\n", | ||
1135 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, | ||
1136 | div_u64(es_stats->es_stats_max_scan_time, 1000)); | ||
1137 | |||
1138 | return 0; | ||
1139 | } | ||
1140 | |||
1141 | static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v) | ||
1142 | { | ||
1143 | } | ||
1144 | |||
1145 | static const struct seq_operations ext4_es_seq_shrinker_info_ops = { | ||
1146 | .start = ext4_es_seq_shrinker_info_start, | ||
1147 | .next = ext4_es_seq_shrinker_info_next, | ||
1148 | .stop = ext4_es_seq_shrinker_info_stop, | ||
1149 | .show = ext4_es_seq_shrinker_info_show, | ||
1150 | }; | ||
1151 | |||
1152 | static int | ||
1153 | ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file) | ||
1154 | { | ||
1155 | int ret; | ||
1156 | |||
1157 | ret = seq_open(file, &ext4_es_seq_shrinker_info_ops); | ||
1158 | if (!ret) { | ||
1159 | struct seq_file *m = file->private_data; | ||
1160 | m->private = PDE_DATA(inode); | ||
1161 | } | ||
1162 | |||
1163 | return ret; | ||
1164 | } | ||
1165 | |||
1166 | static int | ||
1167 | ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file) | ||
1168 | { | ||
1169 | return seq_release(inode, file); | ||
1170 | } | ||
1171 | |||
1172 | static const struct file_operations ext4_es_seq_shrinker_info_fops = { | ||
1173 | .owner = THIS_MODULE, | ||
1174 | .open = ext4_es_seq_shrinker_info_open, | ||
1175 | .read = seq_read, | ||
1176 | .llseek = seq_lseek, | ||
1177 | .release = ext4_es_seq_shrinker_info_release, | ||
1178 | }; | ||
1179 | |||
1180 | int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | ||
1181 | { | ||
1182 | int err; | ||
1183 | |||
1050 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1184 | INIT_LIST_HEAD(&sbi->s_es_lru); |
1051 | spin_lock_init(&sbi->s_es_lru_lock); | 1185 | spin_lock_init(&sbi->s_es_lru_lock); |
1052 | sbi->s_es_last_sorted = 0; | 1186 | sbi->s_es_stats.es_stats_last_sorted = 0; |
1187 | sbi->s_es_stats.es_stats_shrunk = 0; | ||
1188 | sbi->s_es_stats.es_stats_cache_hits = 0; | ||
1189 | sbi->s_es_stats.es_stats_cache_misses = 0; | ||
1190 | sbi->s_es_stats.es_stats_scan_time = 0; | ||
1191 | sbi->s_es_stats.es_stats_max_scan_time = 0; | ||
1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | ||
1193 | if (err) | ||
1194 | return err; | ||
1195 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); | ||
1196 | if (err) | ||
1197 | goto err1; | ||
1198 | |||
1053 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; | 1199 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; |
1054 | sbi->s_es_shrinker.count_objects = ext4_es_count; | 1200 | sbi->s_es_shrinker.count_objects = ext4_es_count; |
1055 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 1201 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
1056 | register_shrinker(&sbi->s_es_shrinker); | 1202 | err = register_shrinker(&sbi->s_es_shrinker); |
1203 | if (err) | ||
1204 | goto err2; | ||
1205 | |||
1206 | if (sbi->s_proc) | ||
1207 | proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc, | ||
1208 | &ext4_es_seq_shrinker_info_fops, sbi); | ||
1209 | |||
1210 | return 0; | ||
1211 | |||
1212 | err2: | ||
1213 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | ||
1214 | err1: | ||
1215 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | ||
1216 | return err; | ||
1057 | } | 1217 | } |
1058 | 1218 | ||
1059 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | 1219 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
1060 | { | 1220 | { |
1221 | if (sbi->s_proc) | ||
1222 | remove_proc_entry("es_shrinker_info", sbi->s_proc); | ||
1223 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | ||
1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | ||
1061 | unregister_shrinker(&sbi->s_es_shrinker); | 1225 | unregister_shrinker(&sbi->s_es_shrinker); |
1062 | } | 1226 | } |
1063 | 1227 | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f1b62a419920..efd5f970b501 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -64,6 +64,17 @@ struct ext4_es_tree { | |||
64 | struct extent_status *cache_es; /* recently accessed extent */ | 64 | struct extent_status *cache_es; /* recently accessed extent */ |
65 | }; | 65 | }; |
66 | 66 | ||
67 | struct ext4_es_stats { | ||
68 | unsigned long es_stats_last_sorted; | ||
69 | unsigned long es_stats_shrunk; | ||
70 | unsigned long es_stats_cache_hits; | ||
71 | unsigned long es_stats_cache_misses; | ||
72 | u64 es_stats_scan_time; | ||
73 | u64 es_stats_max_scan_time; | ||
74 | struct percpu_counter es_stats_all_cnt; | ||
75 | struct percpu_counter es_stats_lru_cnt; | ||
76 | }; | ||
77 | |||
67 | extern int __init ext4_init_es(void); | 78 | extern int __init ext4_init_es(void); |
68 | extern void ext4_exit_es(void); | 79 | extern void ext4_exit_es(void); |
69 | extern void ext4_es_init_tree(struct ext4_es_tree *tree); | 80 | extern void ext4_es_init_tree(struct ext4_es_tree *tree); |
@@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, | |||
138 | (pb & ~ES_MASK)); | 149 | (pb & ~ES_MASK)); |
139 | } | 150 | } |
140 | 151 | ||
141 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 152 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
142 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 153 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
143 | extern void ext4_es_lru_add(struct inode *inode); | 154 | extern void ext4_es_lru_add(struct inode *inode); |
144 | extern void ext4_es_lru_del(struct inode *inode); | 155 | extern void ext4_es_lru_del(struct inode *inode); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index aca7b24a4432..8131be8c0af3 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -137,10 +137,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
137 | iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); | 137 | iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); |
138 | } | 138 | } |
139 | 139 | ||
140 | iocb->private = &overwrite; | ||
140 | if (o_direct) { | 141 | if (o_direct) { |
141 | blk_start_plug(&plug); | 142 | blk_start_plug(&plug); |
142 | 143 | ||
143 | iocb->private = &overwrite; | ||
144 | 144 | ||
145 | /* check whether we do a DIO overwrite or not */ | 145 | /* check whether we do a DIO overwrite or not */ |
146 | if (ext4_should_dioread_nolock(inode) && !aio_mutex && | 146 | if (ext4_should_dioread_nolock(inode) && !aio_mutex && |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 5b87fc36aab8..ac644c31ca67 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -887,6 +887,10 @@ got: | |||
887 | struct buffer_head *block_bitmap_bh; | 887 | struct buffer_head *block_bitmap_bh; |
888 | 888 | ||
889 | block_bitmap_bh = ext4_read_block_bitmap(sb, group); | 889 | block_bitmap_bh = ext4_read_block_bitmap(sb, group); |
890 | if (!block_bitmap_bh) { | ||
891 | err = -EIO; | ||
892 | goto out; | ||
893 | } | ||
890 | BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); | 894 | BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); |
891 | err = ext4_journal_get_write_access(handle, block_bitmap_bh); | 895 | err = ext4_journal_get_write_access(handle, block_bitmap_bh); |
892 | if (err) { | 896 | if (err) { |
@@ -1011,8 +1015,7 @@ got: | |||
1011 | spin_unlock(&sbi->s_next_gen_lock); | 1015 | spin_unlock(&sbi->s_next_gen_lock); |
1012 | 1016 | ||
1013 | /* Precompute checksum seed for inode metadata */ | 1017 | /* Precompute checksum seed for inode metadata */ |
1014 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1018 | if (ext4_has_metadata_csum(sb)) { |
1015 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
1016 | __u32 csum; | 1019 | __u32 csum; |
1017 | __le32 inum = cpu_to_le32(inode->i_ino); | 1020 | __le32 inum = cpu_to_le32(inode->i_ino); |
1018 | __le32 gen = cpu_to_le32(inode->i_generation); | 1021 | __le32 gen = cpu_to_le32(inode->i_generation); |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index e75f840000a0..36b369697a13 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -318,34 +318,24 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | |||
318 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | 318 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain |
319 | * as described above and return 0. | 319 | * as described above and return 0. |
320 | */ | 320 | */ |
321 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 321 | static int ext4_alloc_branch(handle_t *handle, |
322 | ext4_lblk_t iblock, int indirect_blks, | 322 | struct ext4_allocation_request *ar, |
323 | int *blks, ext4_fsblk_t goal, | 323 | int indirect_blks, ext4_lblk_t *offsets, |
324 | ext4_lblk_t *offsets, Indirect *branch) | 324 | Indirect *branch) |
325 | { | 325 | { |
326 | struct ext4_allocation_request ar; | ||
327 | struct buffer_head * bh; | 326 | struct buffer_head * bh; |
328 | ext4_fsblk_t b, new_blocks[4]; | 327 | ext4_fsblk_t b, new_blocks[4]; |
329 | __le32 *p; | 328 | __le32 *p; |
330 | int i, j, err, len = 1; | 329 | int i, j, err, len = 1; |
331 | 330 | ||
332 | /* | ||
333 | * Set up for the direct block allocation | ||
334 | */ | ||
335 | memset(&ar, 0, sizeof(ar)); | ||
336 | ar.inode = inode; | ||
337 | ar.len = *blks; | ||
338 | ar.logical = iblock; | ||
339 | if (S_ISREG(inode->i_mode)) | ||
340 | ar.flags = EXT4_MB_HINT_DATA; | ||
341 | |||
342 | for (i = 0; i <= indirect_blks; i++) { | 331 | for (i = 0; i <= indirect_blks; i++) { |
343 | if (i == indirect_blks) { | 332 | if (i == indirect_blks) { |
344 | ar.goal = goal; | 333 | new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); |
345 | new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err); | ||
346 | } else | 334 | } else |
347 | goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, | 335 | ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, |
348 | goal, 0, NULL, &err); | 336 | ar->inode, ar->goal, |
337 | ar->flags & EXT4_MB_DELALLOC_RESERVED, | ||
338 | NULL, &err); | ||
349 | if (err) { | 339 | if (err) { |
350 | i--; | 340 | i--; |
351 | goto failed; | 341 | goto failed; |
@@ -354,7 +344,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
354 | if (i == 0) | 344 | if (i == 0) |
355 | continue; | 345 | continue; |
356 | 346 | ||
357 | bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); | 347 | bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); |
358 | if (unlikely(!bh)) { | 348 | if (unlikely(!bh)) { |
359 | err = -ENOMEM; | 349 | err = -ENOMEM; |
360 | goto failed; | 350 | goto failed; |
@@ -372,7 +362,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
372 | b = new_blocks[i]; | 362 | b = new_blocks[i]; |
373 | 363 | ||
374 | if (i == indirect_blks) | 364 | if (i == indirect_blks) |
375 | len = ar.len; | 365 | len = ar->len; |
376 | for (j = 0; j < len; j++) | 366 | for (j = 0; j < len; j++) |
377 | *p++ = cpu_to_le32(b++); | 367 | *p++ = cpu_to_le32(b++); |
378 | 368 | ||
@@ -381,11 +371,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
381 | unlock_buffer(bh); | 371 | unlock_buffer(bh); |
382 | 372 | ||
383 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 373 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
384 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 374 | err = ext4_handle_dirty_metadata(handle, ar->inode, bh); |
385 | if (err) | 375 | if (err) |
386 | goto failed; | 376 | goto failed; |
387 | } | 377 | } |
388 | *blks = ar.len; | ||
389 | return 0; | 378 | return 0; |
390 | failed: | 379 | failed: |
391 | for (; i >= 0; i--) { | 380 | for (; i >= 0; i--) { |
@@ -396,10 +385,10 @@ failed: | |||
396 | * existing before ext4_alloc_branch() was called. | 385 | * existing before ext4_alloc_branch() was called. |
397 | */ | 386 | */ |
398 | if (i > 0 && i != indirect_blks && branch[i].bh) | 387 | if (i > 0 && i != indirect_blks && branch[i].bh) |
399 | ext4_forget(handle, 1, inode, branch[i].bh, | 388 | ext4_forget(handle, 1, ar->inode, branch[i].bh, |
400 | branch[i].bh->b_blocknr); | 389 | branch[i].bh->b_blocknr); |
401 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], | 390 | ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], |
402 | (i == indirect_blks) ? ar.len : 1, 0); | 391 | (i == indirect_blks) ? ar->len : 1, 0); |
403 | } | 392 | } |
404 | return err; | 393 | return err; |
405 | } | 394 | } |
@@ -419,9 +408,9 @@ failed: | |||
419 | * inode (->i_blocks, etc.). In case of success we end up with the full | 408 | * inode (->i_blocks, etc.). In case of success we end up with the full |
420 | * chain to new block and return 0. | 409 | * chain to new block and return 0. |
421 | */ | 410 | */ |
422 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | 411 | static int ext4_splice_branch(handle_t *handle, |
423 | ext4_lblk_t block, Indirect *where, int num, | 412 | struct ext4_allocation_request *ar, |
424 | int blks) | 413 | Indirect *where, int num) |
425 | { | 414 | { |
426 | int i; | 415 | int i; |
427 | int err = 0; | 416 | int err = 0; |
@@ -446,9 +435,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
446 | * Update the host buffer_head or inode to point to more just allocated | 435 | * Update the host buffer_head or inode to point to more just allocated |
447 | * direct blocks blocks | 436 | * direct blocks blocks |
448 | */ | 437 | */ |
449 | if (num == 0 && blks > 1) { | 438 | if (num == 0 && ar->len > 1) { |
450 | current_block = le32_to_cpu(where->key) + 1; | 439 | current_block = le32_to_cpu(where->key) + 1; |
451 | for (i = 1; i < blks; i++) | 440 | for (i = 1; i < ar->len; i++) |
452 | *(where->p + i) = cpu_to_le32(current_block++); | 441 | *(where->p + i) = cpu_to_le32(current_block++); |
453 | } | 442 | } |
454 | 443 | ||
@@ -465,14 +454,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
465 | */ | 454 | */ |
466 | jbd_debug(5, "splicing indirect only\n"); | 455 | jbd_debug(5, "splicing indirect only\n"); |
467 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | 456 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); |
468 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); | 457 | err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); |
469 | if (err) | 458 | if (err) |
470 | goto err_out; | 459 | goto err_out; |
471 | } else { | 460 | } else { |
472 | /* | 461 | /* |
473 | * OK, we spliced it into the inode itself on a direct block. | 462 | * OK, we spliced it into the inode itself on a direct block. |
474 | */ | 463 | */ |
475 | ext4_mark_inode_dirty(handle, inode); | 464 | ext4_mark_inode_dirty(handle, ar->inode); |
476 | jbd_debug(5, "splicing direct\n"); | 465 | jbd_debug(5, "splicing direct\n"); |
477 | } | 466 | } |
478 | return err; | 467 | return err; |
@@ -484,11 +473,11 @@ err_out: | |||
484 | * need to revoke the block, which is why we don't | 473 | * need to revoke the block, which is why we don't |
485 | * need to set EXT4_FREE_BLOCKS_METADATA. | 474 | * need to set EXT4_FREE_BLOCKS_METADATA. |
486 | */ | 475 | */ |
487 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 476 | ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, |
488 | EXT4_FREE_BLOCKS_FORGET); | 477 | EXT4_FREE_BLOCKS_FORGET); |
489 | } | 478 | } |
490 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), | 479 | ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), |
491 | blks, 0); | 480 | ar->len, 0); |
492 | 481 | ||
493 | return err; | 482 | return err; |
494 | } | 483 | } |
@@ -525,11 +514,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
525 | struct ext4_map_blocks *map, | 514 | struct ext4_map_blocks *map, |
526 | int flags) | 515 | int flags) |
527 | { | 516 | { |
517 | struct ext4_allocation_request ar; | ||
528 | int err = -EIO; | 518 | int err = -EIO; |
529 | ext4_lblk_t offsets[4]; | 519 | ext4_lblk_t offsets[4]; |
530 | Indirect chain[4]; | 520 | Indirect chain[4]; |
531 | Indirect *partial; | 521 | Indirect *partial; |
532 | ext4_fsblk_t goal; | ||
533 | int indirect_blks; | 522 | int indirect_blks; |
534 | int blocks_to_boundary = 0; | 523 | int blocks_to_boundary = 0; |
535 | int depth; | 524 | int depth; |
@@ -579,7 +568,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
579 | return -ENOSPC; | 568 | return -ENOSPC; |
580 | } | 569 | } |
581 | 570 | ||
582 | goal = ext4_find_goal(inode, map->m_lblk, partial); | 571 | /* Set up for the direct block allocation */ |
572 | memset(&ar, 0, sizeof(ar)); | ||
573 | ar.inode = inode; | ||
574 | ar.logical = map->m_lblk; | ||
575 | if (S_ISREG(inode->i_mode)) | ||
576 | ar.flags = EXT4_MB_HINT_DATA; | ||
577 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
578 | ar.flags |= EXT4_MB_DELALLOC_RESERVED; | ||
579 | |||
580 | ar.goal = ext4_find_goal(inode, map->m_lblk, partial); | ||
583 | 581 | ||
584 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 582 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
585 | indirect_blks = (chain + depth) - partial - 1; | 583 | indirect_blks = (chain + depth) - partial - 1; |
@@ -588,13 +586,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
588 | * Next look up the indirect map to count the totoal number of | 586 | * Next look up the indirect map to count the totoal number of |
589 | * direct blocks to allocate for this branch. | 587 | * direct blocks to allocate for this branch. |
590 | */ | 588 | */ |
591 | count = ext4_blks_to_allocate(partial, indirect_blks, | 589 | ar.len = ext4_blks_to_allocate(partial, indirect_blks, |
592 | map->m_len, blocks_to_boundary); | 590 | map->m_len, blocks_to_boundary); |
591 | |||
593 | /* | 592 | /* |
594 | * Block out ext4_truncate while we alter the tree | 593 | * Block out ext4_truncate while we alter the tree |
595 | */ | 594 | */ |
596 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, | 595 | err = ext4_alloc_branch(handle, &ar, indirect_blks, |
597 | &count, goal, | ||
598 | offsets + (partial - chain), partial); | 596 | offsets + (partial - chain), partial); |
599 | 597 | ||
600 | /* | 598 | /* |
@@ -605,14 +603,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
605 | * may need to return -EAGAIN upwards in the worst case. --sct | 603 | * may need to return -EAGAIN upwards in the worst case. --sct |
606 | */ | 604 | */ |
607 | if (!err) | 605 | if (!err) |
608 | err = ext4_splice_branch(handle, inode, map->m_lblk, | 606 | err = ext4_splice_branch(handle, &ar, partial, indirect_blks); |
609 | partial, indirect_blks, count); | ||
610 | if (err) | 607 | if (err) |
611 | goto cleanup; | 608 | goto cleanup; |
612 | 609 | ||
613 | map->m_flags |= EXT4_MAP_NEW; | 610 | map->m_flags |= EXT4_MAP_NEW; |
614 | 611 | ||
615 | ext4_update_inode_fsync_trans(handle, inode, 1); | 612 | ext4_update_inode_fsync_trans(handle, inode, 1); |
613 | count = ar.len; | ||
616 | got_it: | 614 | got_it: |
617 | map->m_flags |= EXT4_MAP_MAPPED; | 615 | map->m_flags |= EXT4_MAP_MAPPED; |
618 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | 616 | map->m_pblk = le32_to_cpu(chain[depth-1].key); |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index bea662bd0ca6..3ea62695abce 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -594,6 +594,7 @@ retry: | |||
594 | if (ret) { | 594 | if (ret) { |
595 | unlock_page(page); | 595 | unlock_page(page); |
596 | page_cache_release(page); | 596 | page_cache_release(page); |
597 | page = NULL; | ||
597 | ext4_orphan_add(handle, inode); | 598 | ext4_orphan_add(handle, inode); |
598 | up_write(&EXT4_I(inode)->xattr_sem); | 599 | up_write(&EXT4_I(inode)->xattr_sem); |
599 | sem_held = 0; | 600 | sem_held = 0; |
@@ -613,7 +614,8 @@ retry: | |||
613 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 614 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
614 | goto retry; | 615 | goto retry; |
615 | 616 | ||
616 | block_commit_write(page, from, to); | 617 | if (page) |
618 | block_commit_write(page, from, to); | ||
617 | out: | 619 | out: |
618 | if (page) { | 620 | if (page) { |
619 | unlock_page(page); | 621 | unlock_page(page); |
@@ -1126,8 +1128,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, | |||
1126 | memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, | 1128 | memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, |
1127 | inline_size - EXT4_INLINE_DOTDOT_SIZE); | 1129 | inline_size - EXT4_INLINE_DOTDOT_SIZE); |
1128 | 1130 | ||
1129 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1131 | if (ext4_has_metadata_csum(inode->i_sb)) |
1130 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1131 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1132 | csum_size = sizeof(struct ext4_dir_entry_tail); |
1132 | 1133 | ||
1133 | inode->i_size = inode->i_sb->s_blocksize; | 1134 | inode->i_size = inode->i_sb->s_blocksize; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3aa26e9117c4..3356ab5395f4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, | |||
83 | 83 | ||
84 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 84 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
85 | cpu_to_le32(EXT4_OS_LINUX) || | 85 | cpu_to_le32(EXT4_OS_LINUX) || |
86 | !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 86 | !ext4_has_metadata_csum(inode->i_sb)) |
87 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
88 | return 1; | 87 | return 1; |
89 | 88 | ||
90 | provided = le16_to_cpu(raw->i_checksum_lo); | 89 | provided = le16_to_cpu(raw->i_checksum_lo); |
@@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, | |||
105 | 104 | ||
106 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 105 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
107 | cpu_to_le32(EXT4_OS_LINUX) || | 106 | cpu_to_le32(EXT4_OS_LINUX) || |
108 | !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 107 | !ext4_has_metadata_csum(inode->i_sb)) |
109 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
110 | return; | 108 | return; |
111 | 109 | ||
112 | csum = ext4_inode_csum(inode, raw, ei); | 110 | csum = ext4_inode_csum(inode, raw, ei); |
@@ -224,16 +222,15 @@ void ext4_evict_inode(struct inode *inode) | |||
224 | goto no_delete; | 222 | goto no_delete; |
225 | } | 223 | } |
226 | 224 | ||
227 | if (!is_bad_inode(inode)) | 225 | if (is_bad_inode(inode)) |
228 | dquot_initialize(inode); | 226 | goto no_delete; |
227 | dquot_initialize(inode); | ||
229 | 228 | ||
230 | if (ext4_should_order_data(inode)) | 229 | if (ext4_should_order_data(inode)) |
231 | ext4_begin_ordered_truncate(inode, 0); | 230 | ext4_begin_ordered_truncate(inode, 0); |
232 | truncate_inode_pages_final(&inode->i_data); | 231 | truncate_inode_pages_final(&inode->i_data); |
233 | 232 | ||
234 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 233 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
235 | if (is_bad_inode(inode)) | ||
236 | goto no_delete; | ||
237 | 234 | ||
238 | /* | 235 | /* |
239 | * Protect us against freezing - iput() caller didn't have to have any | 236 | * Protect us against freezing - iput() caller didn't have to have any |
@@ -590,20 +587,12 @@ found: | |||
590 | /* | 587 | /* |
591 | * New blocks allocate and/or writing to unwritten extent | 588 | * New blocks allocate and/or writing to unwritten extent |
592 | * will possibly result in updating i_data, so we take | 589 | * will possibly result in updating i_data, so we take |
593 | * the write lock of i_data_sem, and call get_blocks() | 590 | * the write lock of i_data_sem, and call get_block() |
594 | * with create == 1 flag. | 591 | * with create == 1 flag. |
595 | */ | 592 | */ |
596 | down_write(&EXT4_I(inode)->i_data_sem); | 593 | down_write(&EXT4_I(inode)->i_data_sem); |
597 | 594 | ||
598 | /* | 595 | /* |
599 | * if the caller is from delayed allocation writeout path | ||
600 | * we have already reserved fs blocks for allocation | ||
601 | * let the underlying get_block() function know to | ||
602 | * avoid double accounting | ||
603 | */ | ||
604 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
605 | ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | ||
606 | /* | ||
607 | * We need to check for EXT4 here because migrate | 596 | * We need to check for EXT4 here because migrate |
608 | * could have changed the inode type in between | 597 | * could have changed the inode type in between |
609 | */ | 598 | */ |
@@ -631,8 +620,6 @@ found: | |||
631 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 620 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
632 | ext4_da_update_reserve_space(inode, retval, 1); | 621 | ext4_da_update_reserve_space(inode, retval, 1); |
633 | } | 622 | } |
634 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
635 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | ||
636 | 623 | ||
637 | if (retval > 0) { | 624 | if (retval > 0) { |
638 | unsigned int status; | 625 | unsigned int status; |
@@ -734,11 +721,11 @@ int ext4_get_block(struct inode *inode, sector_t iblock, | |||
734 | * `handle' can be NULL if create is zero | 721 | * `handle' can be NULL if create is zero |
735 | */ | 722 | */ |
736 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 723 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
737 | ext4_lblk_t block, int create, int *errp) | 724 | ext4_lblk_t block, int create) |
738 | { | 725 | { |
739 | struct ext4_map_blocks map; | 726 | struct ext4_map_blocks map; |
740 | struct buffer_head *bh; | 727 | struct buffer_head *bh; |
741 | int fatal = 0, err; | 728 | int err; |
742 | 729 | ||
743 | J_ASSERT(handle != NULL || create == 0); | 730 | J_ASSERT(handle != NULL || create == 0); |
744 | 731 | ||
@@ -747,21 +734,14 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
747 | err = ext4_map_blocks(handle, inode, &map, | 734 | err = ext4_map_blocks(handle, inode, &map, |
748 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 735 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
749 | 736 | ||
750 | /* ensure we send some value back into *errp */ | 737 | if (err == 0) |
751 | *errp = 0; | 738 | return create ? ERR_PTR(-ENOSPC) : NULL; |
752 | |||
753 | if (create && err == 0) | ||
754 | err = -ENOSPC; /* should never happen */ | ||
755 | if (err < 0) | 739 | if (err < 0) |
756 | *errp = err; | 740 | return ERR_PTR(err); |
757 | if (err <= 0) | ||
758 | return NULL; | ||
759 | 741 | ||
760 | bh = sb_getblk(inode->i_sb, map.m_pblk); | 742 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
761 | if (unlikely(!bh)) { | 743 | if (unlikely(!bh)) |
762 | *errp = -ENOMEM; | 744 | return ERR_PTR(-ENOMEM); |
763 | return NULL; | ||
764 | } | ||
765 | if (map.m_flags & EXT4_MAP_NEW) { | 745 | if (map.m_flags & EXT4_MAP_NEW) { |
766 | J_ASSERT(create != 0); | 746 | J_ASSERT(create != 0); |
767 | J_ASSERT(handle != NULL); | 747 | J_ASSERT(handle != NULL); |
@@ -775,44 +755,44 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
775 | */ | 755 | */ |
776 | lock_buffer(bh); | 756 | lock_buffer(bh); |
777 | BUFFER_TRACE(bh, "call get_create_access"); | 757 | BUFFER_TRACE(bh, "call get_create_access"); |
778 | fatal = ext4_journal_get_create_access(handle, bh); | 758 | err = ext4_journal_get_create_access(handle, bh); |
779 | if (!fatal && !buffer_uptodate(bh)) { | 759 | if (unlikely(err)) { |
760 | unlock_buffer(bh); | ||
761 | goto errout; | ||
762 | } | ||
763 | if (!buffer_uptodate(bh)) { | ||
780 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); | 764 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
781 | set_buffer_uptodate(bh); | 765 | set_buffer_uptodate(bh); |
782 | } | 766 | } |
783 | unlock_buffer(bh); | 767 | unlock_buffer(bh); |
784 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 768 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
785 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 769 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
786 | if (!fatal) | 770 | if (unlikely(err)) |
787 | fatal = err; | 771 | goto errout; |
788 | } else { | 772 | } else |
789 | BUFFER_TRACE(bh, "not a new buffer"); | 773 | BUFFER_TRACE(bh, "not a new buffer"); |
790 | } | ||
791 | if (fatal) { | ||
792 | *errp = fatal; | ||
793 | brelse(bh); | ||
794 | bh = NULL; | ||
795 | } | ||
796 | return bh; | 774 | return bh; |
775 | errout: | ||
776 | brelse(bh); | ||
777 | return ERR_PTR(err); | ||
797 | } | 778 | } |
798 | 779 | ||
799 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 780 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
800 | ext4_lblk_t block, int create, int *err) | 781 | ext4_lblk_t block, int create) |
801 | { | 782 | { |
802 | struct buffer_head *bh; | 783 | struct buffer_head *bh; |
803 | 784 | ||
804 | bh = ext4_getblk(handle, inode, block, create, err); | 785 | bh = ext4_getblk(handle, inode, block, create); |
805 | if (!bh) | 786 | if (IS_ERR(bh)) |
806 | return bh; | 787 | return bh; |
807 | if (buffer_uptodate(bh)) | 788 | if (!bh || buffer_uptodate(bh)) |
808 | return bh; | 789 | return bh; |
809 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); | 790 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
810 | wait_on_buffer(bh); | 791 | wait_on_buffer(bh); |
811 | if (buffer_uptodate(bh)) | 792 | if (buffer_uptodate(bh)) |
812 | return bh; | 793 | return bh; |
813 | put_bh(bh); | 794 | put_bh(bh); |
814 | *err = -EIO; | 795 | return ERR_PTR(-EIO); |
815 | return NULL; | ||
816 | } | 796 | } |
817 | 797 | ||
818 | int ext4_walk_page_buffers(handle_t *handle, | 798 | int ext4_walk_page_buffers(handle_t *handle, |
@@ -1536,7 +1516,7 @@ out_unlock: | |||
1536 | } | 1516 | } |
1537 | 1517 | ||
1538 | /* | 1518 | /* |
1539 | * This is a special get_blocks_t callback which is used by | 1519 | * This is a special get_block_t callback which is used by |
1540 | * ext4_da_write_begin(). It will either return mapped block or | 1520 | * ext4_da_write_begin(). It will either return mapped block or |
1541 | * reserve space for a single block. | 1521 | * reserve space for a single block. |
1542 | * | 1522 | * |
@@ -2011,12 +1991,10 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | |||
2011 | * in data loss. So use reserved blocks to allocate metadata if | 1991 | * in data loss. So use reserved blocks to allocate metadata if |
2012 | * possible. | 1992 | * possible. |
2013 | * | 1993 | * |
2014 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks | 1994 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if |
2015 | * in question are delalloc blocks. This affects functions in many | 1995 | * the blocks in question are delalloc blocks. This indicates |
2016 | * different parts of the allocation call path. This flag exists | 1996 | * that the blocks and quotas has already been checked when |
2017 | * primarily because we don't want to change *many* call functions, so | 1997 | * the data was copied into the page cache. |
2018 | * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag | ||
2019 | * once the inode's allocation semaphore is taken. | ||
2020 | */ | 1998 | */ |
2021 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | 1999 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
2022 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | 2000 | EXT4_GET_BLOCKS_METADATA_NOFAIL; |
@@ -2515,6 +2493,20 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2515 | return 0; | 2493 | return 0; |
2516 | } | 2494 | } |
2517 | 2495 | ||
2496 | /* We always reserve for an inode update; the superblock could be there too */ | ||
2497 | static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) | ||
2498 | { | ||
2499 | if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
2500 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) | ||
2501 | return 1; | ||
2502 | |||
2503 | if (pos + len <= 0x7fffffffULL) | ||
2504 | return 1; | ||
2505 | |||
2506 | /* We might need to update the superblock to set LARGE_FILE */ | ||
2507 | return 2; | ||
2508 | } | ||
2509 | |||
2518 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2510 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2519 | loff_t pos, unsigned len, unsigned flags, | 2511 | loff_t pos, unsigned len, unsigned flags, |
2520 | struct page **pagep, void **fsdata) | 2512 | struct page **pagep, void **fsdata) |
@@ -2565,7 +2557,8 @@ retry_grab: | |||
2565 | * of file which has an already mapped buffer. | 2557 | * of file which has an already mapped buffer. |
2566 | */ | 2558 | */ |
2567 | retry_journal: | 2559 | retry_journal: |
2568 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); | 2560 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
2561 | ext4_da_write_credits(inode, pos, len)); | ||
2569 | if (IS_ERR(handle)) { | 2562 | if (IS_ERR(handle)) { |
2570 | page_cache_release(page); | 2563 | page_cache_release(page); |
2571 | return PTR_ERR(handle); | 2564 | return PTR_ERR(handle); |
@@ -2658,10 +2651,7 @@ static int ext4_da_write_end(struct file *file, | |||
2658 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { | 2651 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { |
2659 | if (ext4_has_inline_data(inode) || | 2652 | if (ext4_has_inline_data(inode) || |
2660 | ext4_da_should_update_i_disksize(page, end)) { | 2653 | ext4_da_should_update_i_disksize(page, end)) { |
2661 | down_write(&EXT4_I(inode)->i_data_sem); | 2654 | ext4_update_i_disksize(inode, new_i_size); |
2662 | if (new_i_size > EXT4_I(inode)->i_disksize) | ||
2663 | EXT4_I(inode)->i_disksize = new_i_size; | ||
2664 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2665 | /* We need to mark inode dirty even if | 2655 | /* We need to mark inode dirty even if |
2666 | * new_i_size is less that inode->i_size | 2656 | * new_i_size is less that inode->i_size |
2667 | * bu greater than i_disksize.(hint delalloc) | 2657 | * bu greater than i_disksize.(hint delalloc) |
@@ -3936,8 +3926,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3936 | ei->i_extra_isize = 0; | 3926 | ei->i_extra_isize = 0; |
3937 | 3927 | ||
3938 | /* Precompute checksum seed for inode metadata */ | 3928 | /* Precompute checksum seed for inode metadata */ |
3939 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3929 | if (ext4_has_metadata_csum(sb)) { |
3940 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
3941 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 3930 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
3942 | __u32 csum; | 3931 | __u32 csum; |
3943 | __le32 inum = cpu_to_le32(inode->i_ino); | 3932 | __le32 inum = cpu_to_le32(inode->i_ino); |
@@ -4127,6 +4116,13 @@ bad_inode: | |||
4127 | return ERR_PTR(ret); | 4116 | return ERR_PTR(ret); |
4128 | } | 4117 | } |
4129 | 4118 | ||
4119 | struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) | ||
4120 | { | ||
4121 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) | ||
4122 | return ERR_PTR(-EIO); | ||
4123 | return ext4_iget(sb, ino); | ||
4124 | } | ||
4125 | |||
4130 | static int ext4_inode_blocks_set(handle_t *handle, | 4126 | static int ext4_inode_blocks_set(handle_t *handle, |
4131 | struct ext4_inode *raw_inode, | 4127 | struct ext4_inode *raw_inode, |
4132 | struct ext4_inode_info *ei) | 4128 | struct ext4_inode_info *ei) |
@@ -4226,7 +4222,8 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4226 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); | 4222 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); |
4227 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); | 4223 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); |
4228 | 4224 | ||
4229 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) { | 4225 | err = ext4_inode_blocks_set(handle, raw_inode, ei); |
4226 | if (err) { | ||
4230 | spin_unlock(&ei->i_raw_lock); | 4227 | spin_unlock(&ei->i_raw_lock); |
4231 | goto out_brelse; | 4228 | goto out_brelse; |
4232 | } | 4229 | } |
@@ -4536,8 +4533,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4536 | ext4_orphan_del(NULL, inode); | 4533 | ext4_orphan_del(NULL, inode); |
4537 | goto err_out; | 4534 | goto err_out; |
4538 | } | 4535 | } |
4539 | } else | 4536 | } else { |
4537 | loff_t oldsize = inode->i_size; | ||
4538 | |||
4540 | i_size_write(inode, attr->ia_size); | 4539 | i_size_write(inode, attr->ia_size); |
4540 | pagecache_isize_extended(inode, oldsize, inode->i_size); | ||
4541 | } | ||
4541 | 4542 | ||
4542 | /* | 4543 | /* |
4543 | * Blocks are going to be removed from the inode. Wait | 4544 | * Blocks are going to be removed from the inode. Wait |
@@ -4958,7 +4959,12 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4958 | if (val) | 4959 | if (val) |
4959 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4960 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
4960 | else { | 4961 | else { |
4961 | jbd2_journal_flush(journal); | 4962 | err = jbd2_journal_flush(journal); |
4963 | if (err < 0) { | ||
4964 | jbd2_journal_unlock_updates(journal); | ||
4965 | ext4_inode_resume_unlocked_dio(inode); | ||
4966 | return err; | ||
4967 | } | ||
4962 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4968 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
4963 | } | 4969 | } |
4964 | ext4_set_aops(inode); | 4970 | ext4_set_aops(inode); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 0f2252ec274d..bfda18a15592 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -331,8 +331,7 @@ flags_out: | |||
331 | if (!inode_owner_or_capable(inode)) | 331 | if (!inode_owner_or_capable(inode)) |
332 | return -EPERM; | 332 | return -EPERM; |
333 | 333 | ||
334 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 334 | if (ext4_has_metadata_csum(inode->i_sb)) { |
335 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
336 | ext4_warning(sb, "Setting inode version is not " | 335 | ext4_warning(sb, "Setting inode version is not " |
337 | "supported with metadata_csum enabled."); | 336 | "supported with metadata_csum enabled."); |
338 | return -ENOTTY; | 337 | return -ENOTTY; |
@@ -532,9 +531,17 @@ group_add_out: | |||
532 | } | 531 | } |
533 | 532 | ||
534 | case EXT4_IOC_SWAP_BOOT: | 533 | case EXT4_IOC_SWAP_BOOT: |
534 | { | ||
535 | int err; | ||
535 | if (!(filp->f_mode & FMODE_WRITE)) | 536 | if (!(filp->f_mode & FMODE_WRITE)) |
536 | return -EBADF; | 537 | return -EBADF; |
537 | return swap_inode_boot_loader(sb, inode); | 538 | err = mnt_want_write_file(filp); |
539 | if (err) | ||
540 | return err; | ||
541 | err = swap_inode_boot_loader(sb, inode); | ||
542 | mnt_drop_write_file(filp); | ||
543 | return err; | ||
544 | } | ||
538 | 545 | ||
539 | case EXT4_IOC_RESIZE_FS: { | 546 | case EXT4_IOC_RESIZE_FS: { |
540 | ext4_fsblk_t n_blocks_count; | 547 | ext4_fsblk_t n_blocks_count; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 748c9136a60a..dbfe15c2533c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3155,9 +3155,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3155 | "start %lu, size %lu, fe_logical %lu", | 3155 | "start %lu, size %lu, fe_logical %lu", |
3156 | (unsigned long) start, (unsigned long) size, | 3156 | (unsigned long) start, (unsigned long) size, |
3157 | (unsigned long) ac->ac_o_ex.fe_logical); | 3157 | (unsigned long) ac->ac_o_ex.fe_logical); |
3158 | BUG(); | ||
3158 | } | 3159 | } |
3159 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && | ||
3160 | start > ac->ac_o_ex.fe_logical); | ||
3161 | BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 3160 | BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); |
3162 | 3161 | ||
3163 | /* now prepare goal request */ | 3162 | /* now prepare goal request */ |
@@ -4410,14 +4409,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4410 | if (IS_NOQUOTA(ar->inode)) | 4409 | if (IS_NOQUOTA(ar->inode)) |
4411 | ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; | 4410 | ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; |
4412 | 4411 | ||
4413 | /* | 4412 | if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { |
4414 | * For delayed allocation, we could skip the ENOSPC and | ||
4415 | * EDQUOT check, as blocks and quotas have been already | ||
4416 | * reserved when data being copied into pagecache. | ||
4417 | */ | ||
4418 | if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) | ||
4419 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | ||
4420 | else { | ||
4421 | /* Without delayed allocation we need to verify | 4413 | /* Without delayed allocation we need to verify |
4422 | * there is enough free blocks to do block allocation | 4414 | * there is enough free blocks to do block allocation |
4423 | * and verify allocation doesn't exceed the quota limits. | 4415 | * and verify allocation doesn't exceed the quota limits. |
@@ -4528,8 +4520,7 @@ out: | |||
4528 | if (inquota && ar->len < inquota) | 4520 | if (inquota && ar->len < inquota) |
4529 | dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); | 4521 | dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); |
4530 | if (!ar->len) { | 4522 | if (!ar->len) { |
4531 | if (!ext4_test_inode_state(ar->inode, | 4523 | if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) |
4532 | EXT4_STATE_DELALLOC_RESERVED)) | ||
4533 | /* release all the reserved blocks if non delalloc */ | 4524 | /* release all the reserved blocks if non delalloc */ |
4534 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, | 4525 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
4535 | reserv_clstrs); | 4526 | reserv_clstrs); |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index d3567f27bae7..a432634f2e6a 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -41,8 +41,7 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
41 | ext4_ext_store_pblock(&newext, lb->first_pblock); | 41 | ext4_ext_store_pblock(&newext, lb->first_pblock); |
42 | /* Locking only for convinience since we are operating on temp inode */ | 42 | /* Locking only for convinience since we are operating on temp inode */ |
43 | down_write(&EXT4_I(inode)->i_data_sem); | 43 | down_write(&EXT4_I(inode)->i_data_sem); |
44 | path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); | 44 | path = ext4_find_extent(inode, lb->first_block, NULL, 0); |
45 | |||
46 | if (IS_ERR(path)) { | 45 | if (IS_ERR(path)) { |
47 | retval = PTR_ERR(path); | 46 | retval = PTR_ERR(path); |
48 | path = NULL; | 47 | path = NULL; |
@@ -81,13 +80,11 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
81 | goto err_out; | 80 | goto err_out; |
82 | } | 81 | } |
83 | } | 82 | } |
84 | retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); | 83 | retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); |
85 | err_out: | 84 | err_out: |
86 | up_write((&EXT4_I(inode)->i_data_sem)); | 85 | up_write((&EXT4_I(inode)->i_data_sem)); |
87 | if (path) { | 86 | ext4_ext_drop_refs(path); |
88 | ext4_ext_drop_refs(path); | 87 | kfree(path); |
89 | kfree(path); | ||
90 | } | ||
91 | lb->first_pblock = 0; | 88 | lb->first_pblock = 0; |
92 | return retval; | 89 | return retval; |
93 | } | 90 | } |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 32bce844c2e1..8313ca3324ec 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
@@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) | |||
20 | 20 | ||
21 | static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) | 21 | static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) |
22 | { | 22 | { |
23 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 23 | if (!ext4_has_metadata_csum(sb)) |
24 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
25 | return 1; | 24 | return 1; |
26 | 25 | ||
27 | return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); | 26 | return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); |
@@ -29,8 +28,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) | |||
29 | 28 | ||
30 | static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) | 29 | static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) |
31 | { | 30 | { |
32 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 31 | if (!ext4_has_metadata_csum(sb)) |
33 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
34 | return; | 32 | return; |
35 | 33 | ||
36 | mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); | 34 | mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 671a74b14fd7..9f2311bc9c4f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -27,120 +27,26 @@ | |||
27 | * @lblock: logical block number to find an extent path | 27 | * @lblock: logical block number to find an extent path |
28 | * @path: pointer to an extent path pointer (for output) | 28 | * @path: pointer to an extent path pointer (for output) |
29 | * | 29 | * |
30 | * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value | 30 | * ext4_find_extent wrapper. Return 0 on success, or a negative error value |
31 | * on failure. | 31 | * on failure. |
32 | */ | 32 | */ |
33 | static inline int | 33 | static inline int |
34 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, | 34 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, |
35 | struct ext4_ext_path **orig_path) | 35 | struct ext4_ext_path **ppath) |
36 | { | 36 | { |
37 | int ret = 0; | ||
38 | struct ext4_ext_path *path; | 37 | struct ext4_ext_path *path; |
39 | 38 | ||
40 | path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); | 39 | path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE); |
41 | if (IS_ERR(path)) | 40 | if (IS_ERR(path)) |
42 | ret = PTR_ERR(path); | 41 | return PTR_ERR(path); |
43 | else if (path[ext_depth(inode)].p_ext == NULL) | 42 | if (path[ext_depth(inode)].p_ext == NULL) { |
44 | ret = -ENODATA; | 43 | ext4_ext_drop_refs(path); |
45 | else | 44 | kfree(path); |
46 | *orig_path = path; | 45 | *ppath = NULL; |
47 | 46 | return -ENODATA; | |
48 | return ret; | ||
49 | } | ||
50 | |||
51 | /** | ||
52 | * copy_extent_status - Copy the extent's initialization status | ||
53 | * | ||
54 | * @src: an extent for getting initialize status | ||
55 | * @dest: an extent to be set the status | ||
56 | */ | ||
57 | static void | ||
58 | copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) | ||
59 | { | ||
60 | if (ext4_ext_is_unwritten(src)) | ||
61 | ext4_ext_mark_unwritten(dest); | ||
62 | else | ||
63 | dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * mext_next_extent - Search for the next extent and set it to "extent" | ||
68 | * | ||
69 | * @inode: inode which is searched | ||
70 | * @path: this will obtain data for the next extent | ||
71 | * @extent: pointer to the next extent we have just gotten | ||
72 | * | ||
73 | * Search the next extent in the array of ext4_ext_path structure (@path) | ||
74 | * and set it to ext4_extent structure (@extent). In addition, the member of | ||
75 | * @path (->p_ext) also points the next extent. Return 0 on success, 1 if | ||
76 | * ext4_ext_path structure refers to the last extent, or a negative error | ||
77 | * value on failure. | ||
78 | */ | ||
79 | int | ||
80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
81 | struct ext4_extent **extent) | ||
82 | { | ||
83 | struct ext4_extent_header *eh; | ||
84 | int ppos, leaf_ppos = path->p_depth; | ||
85 | |||
86 | ppos = leaf_ppos; | ||
87 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | ||
88 | /* leaf block */ | ||
89 | *extent = ++path[ppos].p_ext; | ||
90 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | while (--ppos >= 0) { | ||
95 | if (EXT_LAST_INDEX(path[ppos].p_hdr) > | ||
96 | path[ppos].p_idx) { | ||
97 | int cur_ppos = ppos; | ||
98 | |||
99 | /* index block */ | ||
100 | path[ppos].p_idx++; | ||
101 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); | ||
102 | if (path[ppos+1].p_bh) | ||
103 | brelse(path[ppos+1].p_bh); | ||
104 | path[ppos+1].p_bh = | ||
105 | sb_bread(inode->i_sb, path[ppos].p_block); | ||
106 | if (!path[ppos+1].p_bh) | ||
107 | return -EIO; | ||
108 | path[ppos+1].p_hdr = | ||
109 | ext_block_hdr(path[ppos+1].p_bh); | ||
110 | |||
111 | /* Halfway index block */ | ||
112 | while (++cur_ppos < leaf_ppos) { | ||
113 | path[cur_ppos].p_idx = | ||
114 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); | ||
115 | path[cur_ppos].p_block = | ||
116 | ext4_idx_pblock(path[cur_ppos].p_idx); | ||
117 | if (path[cur_ppos+1].p_bh) | ||
118 | brelse(path[cur_ppos+1].p_bh); | ||
119 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, | ||
120 | path[cur_ppos].p_block); | ||
121 | if (!path[cur_ppos+1].p_bh) | ||
122 | return -EIO; | ||
123 | path[cur_ppos+1].p_hdr = | ||
124 | ext_block_hdr(path[cur_ppos+1].p_bh); | ||
125 | } | ||
126 | |||
127 | path[leaf_ppos].p_ext = *extent = NULL; | ||
128 | |||
129 | eh = path[leaf_ppos].p_hdr; | ||
130 | if (le16_to_cpu(eh->eh_entries) == 0) | ||
131 | /* empty leaf is found */ | ||
132 | return -ENODATA; | ||
133 | |||
134 | /* leaf block */ | ||
135 | path[leaf_ppos].p_ext = *extent = | ||
136 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | ||
137 | path[leaf_ppos].p_block = | ||
138 | ext4_ext_pblock(path[leaf_ppos].p_ext); | ||
139 | return 0; | ||
140 | } | ||
141 | } | 47 | } |
142 | /* We found the last extent */ | 48 | *ppath = path; |
143 | return 1; | 49 | return 0; |
144 | } | 50 | } |
145 | 51 | ||
146 | /** | 52 | /** |
@@ -178,417 +84,6 @@ ext4_double_up_write_data_sem(struct inode *orig_inode, | |||
178 | } | 84 | } |
179 | 85 | ||
180 | /** | 86 | /** |
181 | * mext_insert_across_blocks - Insert extents across leaf block | ||
182 | * | ||
183 | * @handle: journal handle | ||
184 | * @orig_inode: original inode | ||
185 | * @o_start: first original extent to be changed | ||
186 | * @o_end: last original extent to be changed | ||
187 | * @start_ext: first new extent to be inserted | ||
188 | * @new_ext: middle of new extent to be inserted | ||
189 | * @end_ext: last new extent to be inserted | ||
190 | * | ||
191 | * Allocate a new leaf block and insert extents into it. Return 0 on success, | ||
192 | * or a negative error value on failure. | ||
193 | */ | ||
194 | static int | ||
195 | mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | ||
196 | struct ext4_extent *o_start, struct ext4_extent *o_end, | ||
197 | struct ext4_extent *start_ext, struct ext4_extent *new_ext, | ||
198 | struct ext4_extent *end_ext) | ||
199 | { | ||
200 | struct ext4_ext_path *orig_path = NULL; | ||
201 | ext4_lblk_t eblock = 0; | ||
202 | int new_flag = 0; | ||
203 | int end_flag = 0; | ||
204 | int err = 0; | ||
205 | |||
206 | if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) { | ||
207 | if (o_start == o_end) { | ||
208 | |||
209 | /* start_ext new_ext end_ext | ||
210 | * donor |---------|-----------|--------| | ||
211 | * orig |------------------------------| | ||
212 | */ | ||
213 | end_flag = 1; | ||
214 | } else { | ||
215 | |||
216 | /* start_ext new_ext end_ext | ||
217 | * donor |---------|----------|---------| | ||
218 | * orig |---------------|--------------| | ||
219 | */ | ||
220 | o_end->ee_block = end_ext->ee_block; | ||
221 | o_end->ee_len = end_ext->ee_len; | ||
222 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); | ||
223 | } | ||
224 | |||
225 | o_start->ee_len = start_ext->ee_len; | ||
226 | eblock = le32_to_cpu(start_ext->ee_block); | ||
227 | new_flag = 1; | ||
228 | |||
229 | } else if (start_ext->ee_len && new_ext->ee_len && | ||
230 | !end_ext->ee_len && o_start == o_end) { | ||
231 | |||
232 | /* start_ext new_ext | ||
233 | * donor |--------------|---------------| | ||
234 | * orig |------------------------------| | ||
235 | */ | ||
236 | o_start->ee_len = start_ext->ee_len; | ||
237 | eblock = le32_to_cpu(start_ext->ee_block); | ||
238 | new_flag = 1; | ||
239 | |||
240 | } else if (!start_ext->ee_len && new_ext->ee_len && | ||
241 | end_ext->ee_len && o_start == o_end) { | ||
242 | |||
243 | /* new_ext end_ext | ||
244 | * donor |--------------|---------------| | ||
245 | * orig |------------------------------| | ||
246 | */ | ||
247 | o_end->ee_block = end_ext->ee_block; | ||
248 | o_end->ee_len = end_ext->ee_len; | ||
249 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); | ||
250 | |||
251 | /* | ||
252 | * Set 0 to the extent block if new_ext was | ||
253 | * the first block. | ||
254 | */ | ||
255 | if (new_ext->ee_block) | ||
256 | eblock = le32_to_cpu(new_ext->ee_block); | ||
257 | |||
258 | new_flag = 1; | ||
259 | } else { | ||
260 | ext4_debug("ext4 move extent: Unexpected insert case\n"); | ||
261 | return -EIO; | ||
262 | } | ||
263 | |||
264 | if (new_flag) { | ||
265 | err = get_ext_path(orig_inode, eblock, &orig_path); | ||
266 | if (err) | ||
267 | goto out; | ||
268 | |||
269 | if (ext4_ext_insert_extent(handle, orig_inode, | ||
270 | orig_path, new_ext, 0)) | ||
271 | goto out; | ||
272 | } | ||
273 | |||
274 | if (end_flag) { | ||
275 | err = get_ext_path(orig_inode, | ||
276 | le32_to_cpu(end_ext->ee_block) - 1, &orig_path); | ||
277 | if (err) | ||
278 | goto out; | ||
279 | |||
280 | if (ext4_ext_insert_extent(handle, orig_inode, | ||
281 | orig_path, end_ext, 0)) | ||
282 | goto out; | ||
283 | } | ||
284 | out: | ||
285 | if (orig_path) { | ||
286 | ext4_ext_drop_refs(orig_path); | ||
287 | kfree(orig_path); | ||
288 | } | ||
289 | |||
290 | return err; | ||
291 | |||
292 | } | ||
293 | |||
294 | /** | ||
295 | * mext_insert_inside_block - Insert new extent to the extent block | ||
296 | * | ||
297 | * @o_start: first original extent to be moved | ||
298 | * @o_end: last original extent to be moved | ||
299 | * @start_ext: first new extent to be inserted | ||
300 | * @new_ext: middle of new extent to be inserted | ||
301 | * @end_ext: last new extent to be inserted | ||
302 | * @eh: extent header of target leaf block | ||
303 | * @range_to_move: used to decide how to insert extent | ||
304 | * | ||
305 | * Insert extents into the leaf block. The extent (@o_start) is overwritten | ||
306 | * by inserted extents. | ||
307 | */ | ||
308 | static void | ||
309 | mext_insert_inside_block(struct ext4_extent *o_start, | ||
310 | struct ext4_extent *o_end, | ||
311 | struct ext4_extent *start_ext, | ||
312 | struct ext4_extent *new_ext, | ||
313 | struct ext4_extent *end_ext, | ||
314 | struct ext4_extent_header *eh, | ||
315 | int range_to_move) | ||
316 | { | ||
317 | int i = 0; | ||
318 | unsigned long len; | ||
319 | |||
320 | /* Move the existing extents */ | ||
321 | if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) { | ||
322 | len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) - | ||
323 | (unsigned long)(o_end + 1); | ||
324 | memmove(o_end + 1 + range_to_move, o_end + 1, len); | ||
325 | } | ||
326 | |||
327 | /* Insert start entry */ | ||
328 | if (start_ext->ee_len) | ||
329 | o_start[i++].ee_len = start_ext->ee_len; | ||
330 | |||
331 | /* Insert new entry */ | ||
332 | if (new_ext->ee_len) { | ||
333 | o_start[i] = *new_ext; | ||
334 | ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); | ||
335 | } | ||
336 | |||
337 | /* Insert end entry */ | ||
338 | if (end_ext->ee_len) | ||
339 | o_start[i] = *end_ext; | ||
340 | |||
341 | /* Increment the total entries counter on the extent block */ | ||
342 | le16_add_cpu(&eh->eh_entries, range_to_move); | ||
343 | } | ||
344 | |||
345 | /** | ||
346 | * mext_insert_extents - Insert new extent | ||
347 | * | ||
348 | * @handle: journal handle | ||
349 | * @orig_inode: original inode | ||
350 | * @orig_path: path indicates first extent to be changed | ||
351 | * @o_start: first original extent to be changed | ||
352 | * @o_end: last original extent to be changed | ||
353 | * @start_ext: first new extent to be inserted | ||
354 | * @new_ext: middle of new extent to be inserted | ||
355 | * @end_ext: last new extent to be inserted | ||
356 | * | ||
357 | * Call the function to insert extents. If we cannot add more extents into | ||
358 | * the leaf block, we call mext_insert_across_blocks() to create a | ||
359 | * new leaf block. Otherwise call mext_insert_inside_block(). Return 0 | ||
360 | * on success, or a negative error value on failure. | ||
361 | */ | ||
362 | static int | ||
363 | mext_insert_extents(handle_t *handle, struct inode *orig_inode, | ||
364 | struct ext4_ext_path *orig_path, | ||
365 | struct ext4_extent *o_start, | ||
366 | struct ext4_extent *o_end, | ||
367 | struct ext4_extent *start_ext, | ||
368 | struct ext4_extent *new_ext, | ||
369 | struct ext4_extent *end_ext) | ||
370 | { | ||
371 | struct ext4_extent_header *eh; | ||
372 | unsigned long need_slots, slots_range; | ||
373 | int range_to_move, depth, ret; | ||
374 | |||
375 | /* | ||
376 | * The extents need to be inserted | ||
377 | * start_extent + new_extent + end_extent. | ||
378 | */ | ||
379 | need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) + | ||
380 | (new_ext->ee_len ? 1 : 0); | ||
381 | |||
382 | /* The number of slots between start and end */ | ||
383 | slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1) | ||
384 | / sizeof(struct ext4_extent); | ||
385 | |||
386 | /* Range to move the end of extent */ | ||
387 | range_to_move = need_slots - slots_range; | ||
388 | depth = orig_path->p_depth; | ||
389 | orig_path += depth; | ||
390 | eh = orig_path->p_hdr; | ||
391 | |||
392 | if (depth) { | ||
393 | /* Register to journal */ | ||
394 | BUFFER_TRACE(orig_path->p_bh, "get_write_access"); | ||
395 | ret = ext4_journal_get_write_access(handle, orig_path->p_bh); | ||
396 | if (ret) | ||
397 | return ret; | ||
398 | } | ||
399 | |||
400 | /* Expansion */ | ||
401 | if (range_to_move > 0 && | ||
402 | (range_to_move > le16_to_cpu(eh->eh_max) | ||
403 | - le16_to_cpu(eh->eh_entries))) { | ||
404 | |||
405 | ret = mext_insert_across_blocks(handle, orig_inode, o_start, | ||
406 | o_end, start_ext, new_ext, end_ext); | ||
407 | if (ret < 0) | ||
408 | return ret; | ||
409 | } else | ||
410 | mext_insert_inside_block(o_start, o_end, start_ext, new_ext, | ||
411 | end_ext, eh, range_to_move); | ||
412 | |||
413 | return ext4_ext_dirty(handle, orig_inode, orig_path); | ||
414 | } | ||
415 | |||
416 | /** | ||
417 | * mext_leaf_block - Move one leaf extent block into the inode. | ||
418 | * | ||
419 | * @handle: journal handle | ||
420 | * @orig_inode: original inode | ||
421 | * @orig_path: path indicates first extent to be changed | ||
422 | * @dext: donor extent | ||
423 | * @from: start offset on the target file | ||
424 | * | ||
425 | * In order to insert extents into the leaf block, we must divide the extent | ||
426 | * in the leaf block into three extents. The one is located to be inserted | ||
427 | * extents, and the others are located around it. | ||
428 | * | ||
429 | * Therefore, this function creates structures to save extents of the leaf | ||
430 | * block, and inserts extents by calling mext_insert_extents() with | ||
431 | * created extents. Return 0 on success, or a negative error value on failure. | ||
432 | */ | ||
433 | static int | ||
434 | mext_leaf_block(handle_t *handle, struct inode *orig_inode, | ||
435 | struct ext4_ext_path *orig_path, struct ext4_extent *dext, | ||
436 | ext4_lblk_t *from) | ||
437 | { | ||
438 | struct ext4_extent *oext, *o_start, *o_end, *prev_ext; | ||
439 | struct ext4_extent new_ext, start_ext, end_ext; | ||
440 | ext4_lblk_t new_ext_end; | ||
441 | int oext_alen, new_ext_alen, end_ext_alen; | ||
442 | int depth = ext_depth(orig_inode); | ||
443 | int ret; | ||
444 | |||
445 | start_ext.ee_block = end_ext.ee_block = 0; | ||
446 | o_start = o_end = oext = orig_path[depth].p_ext; | ||
447 | oext_alen = ext4_ext_get_actual_len(oext); | ||
448 | start_ext.ee_len = end_ext.ee_len = 0; | ||
449 | |||
450 | new_ext.ee_block = cpu_to_le32(*from); | ||
451 | ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); | ||
452 | new_ext.ee_len = dext->ee_len; | ||
453 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | ||
454 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | ||
455 | |||
456 | /* | ||
457 | * Case: original extent is first | ||
458 | * oext |--------| | ||
459 | * new_ext |--| | ||
460 | * start_ext |--| | ||
461 | */ | ||
462 | if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) && | ||
463 | le32_to_cpu(new_ext.ee_block) < | ||
464 | le32_to_cpu(oext->ee_block) + oext_alen) { | ||
465 | start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - | ||
466 | le32_to_cpu(oext->ee_block)); | ||
467 | start_ext.ee_block = oext->ee_block; | ||
468 | copy_extent_status(oext, &start_ext); | ||
469 | } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { | ||
470 | prev_ext = oext - 1; | ||
471 | /* | ||
472 | * We can merge new_ext into previous extent, | ||
473 | * if these are contiguous and same extent type. | ||
474 | */ | ||
475 | if (ext4_can_extents_be_merged(orig_inode, prev_ext, | ||
476 | &new_ext)) { | ||
477 | o_start = prev_ext; | ||
478 | start_ext.ee_len = cpu_to_le16( | ||
479 | ext4_ext_get_actual_len(prev_ext) + | ||
480 | new_ext_alen); | ||
481 | start_ext.ee_block = oext->ee_block; | ||
482 | copy_extent_status(prev_ext, &start_ext); | ||
483 | new_ext.ee_len = 0; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | /* | ||
488 | * Case: new_ext_end must be less than oext | ||
489 | * oext |-----------| | ||
490 | * new_ext |-------| | ||
491 | */ | ||
492 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { | ||
493 | EXT4_ERROR_INODE(orig_inode, | ||
494 | "new_ext_end(%u) should be less than or equal to " | ||
495 | "oext->ee_block(%u) + oext_alen(%d) - 1", | ||
496 | new_ext_end, le32_to_cpu(oext->ee_block), | ||
497 | oext_alen); | ||
498 | ret = -EIO; | ||
499 | goto out; | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Case: new_ext is smaller than original extent | ||
504 | * oext |---------------| | ||
505 | * new_ext |-----------| | ||
506 | * end_ext |---| | ||
507 | */ | ||
508 | if (le32_to_cpu(oext->ee_block) <= new_ext_end && | ||
509 | new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) { | ||
510 | end_ext.ee_len = | ||
511 | cpu_to_le16(le32_to_cpu(oext->ee_block) + | ||
512 | oext_alen - 1 - new_ext_end); | ||
513 | copy_extent_status(oext, &end_ext); | ||
514 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); | ||
515 | ext4_ext_store_pblock(&end_ext, | ||
516 | (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); | ||
517 | end_ext.ee_block = | ||
518 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + | ||
519 | oext_alen - end_ext_alen); | ||
520 | } | ||
521 | |||
522 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, | ||
523 | o_end, &start_ext, &new_ext, &end_ext); | ||
524 | out: | ||
525 | return ret; | ||
526 | } | ||
527 | |||
528 | /** | ||
529 | * mext_calc_swap_extents - Calculate extents for extent swapping. | ||
530 | * | ||
531 | * @tmp_dext: the extent that will belong to the original inode | ||
532 | * @tmp_oext: the extent that will belong to the donor inode | ||
533 | * @orig_off: block offset of original inode | ||
534 | * @donor_off: block offset of donor inode | ||
535 | * @max_count: the maximum length of extents | ||
536 | * | ||
537 | * Return 0 on success, or a negative error value on failure. | ||
538 | */ | ||
539 | static int | ||
540 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, | ||
541 | struct ext4_extent *tmp_oext, | ||
542 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, | ||
543 | ext4_lblk_t max_count) | ||
544 | { | ||
545 | ext4_lblk_t diff, orig_diff; | ||
546 | struct ext4_extent dext_old, oext_old; | ||
547 | |||
548 | BUG_ON(orig_off != donor_off); | ||
549 | |||
550 | /* original and donor extents have to cover the same block offset */ | ||
551 | if (orig_off < le32_to_cpu(tmp_oext->ee_block) || | ||
552 | le32_to_cpu(tmp_oext->ee_block) + | ||
553 | ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) | ||
554 | return -ENODATA; | ||
555 | |||
556 | if (orig_off < le32_to_cpu(tmp_dext->ee_block) || | ||
557 | le32_to_cpu(tmp_dext->ee_block) + | ||
558 | ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) | ||
559 | return -ENODATA; | ||
560 | |||
561 | dext_old = *tmp_dext; | ||
562 | oext_old = *tmp_oext; | ||
563 | |||
564 | /* When tmp_dext is too large, pick up the target range. */ | ||
565 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | ||
566 | |||
567 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); | ||
568 | le32_add_cpu(&tmp_dext->ee_block, diff); | ||
569 | le16_add_cpu(&tmp_dext->ee_len, -diff); | ||
570 | |||
571 | if (max_count < ext4_ext_get_actual_len(tmp_dext)) | ||
572 | tmp_dext->ee_len = cpu_to_le16(max_count); | ||
573 | |||
574 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); | ||
575 | ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); | ||
576 | |||
577 | /* Adjust extent length if donor extent is larger than orig */ | ||
578 | if (ext4_ext_get_actual_len(tmp_dext) > | ||
579 | ext4_ext_get_actual_len(tmp_oext) - orig_diff) | ||
580 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) - | ||
581 | orig_diff); | ||
582 | |||
583 | tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext)); | ||
584 | |||
585 | copy_extent_status(&oext_old, tmp_dext); | ||
586 | copy_extent_status(&dext_old, tmp_oext); | ||
587 | |||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | /** | ||
592 | * mext_check_coverage - Check that all extents in range has the same type | 87 | * mext_check_coverage - Check that all extents in range has the same type |
593 | * | 88 | * |
594 | * @inode: inode in question | 89 | * @inode: inode in question |
@@ -619,171 +114,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, | |||
619 | } | 114 | } |
620 | ret = 1; | 115 | ret = 1; |
621 | out: | 116 | out: |
622 | if (path) { | 117 | ext4_ext_drop_refs(path); |
623 | ext4_ext_drop_refs(path); | 118 | kfree(path); |
624 | kfree(path); | ||
625 | } | ||
626 | return ret; | 119 | return ret; |
627 | } | 120 | } |
628 | 121 | ||
629 | /** | 122 | /** |
630 | * mext_replace_branches - Replace original extents with new extents | ||
631 | * | ||
632 | * @handle: journal handle | ||
633 | * @orig_inode: original inode | ||
634 | * @donor_inode: donor inode | ||
635 | * @from: block offset of orig_inode | ||
636 | * @count: block count to be replaced | ||
637 | * @err: pointer to save return value | ||
638 | * | ||
639 | * Replace original inode extents and donor inode extents page by page. | ||
640 | * We implement this replacement in the following three steps: | ||
641 | * 1. Save the block information of original and donor inodes into | ||
642 | * dummy extents. | ||
643 | * 2. Change the block information of original inode to point at the | ||
644 | * donor inode blocks. | ||
645 | * 3. Change the block information of donor inode to point at the saved | ||
646 | * original inode blocks in the dummy extents. | ||
647 | * | ||
648 | * Return replaced block count. | ||
649 | */ | ||
650 | static int | ||
651 | mext_replace_branches(handle_t *handle, struct inode *orig_inode, | ||
652 | struct inode *donor_inode, ext4_lblk_t from, | ||
653 | ext4_lblk_t count, int *err) | ||
654 | { | ||
655 | struct ext4_ext_path *orig_path = NULL; | ||
656 | struct ext4_ext_path *donor_path = NULL; | ||
657 | struct ext4_extent *oext, *dext; | ||
658 | struct ext4_extent tmp_dext, tmp_oext; | ||
659 | ext4_lblk_t orig_off = from, donor_off = from; | ||
660 | int depth; | ||
661 | int replaced_count = 0; | ||
662 | int dext_alen; | ||
663 | |||
664 | *err = ext4_es_remove_extent(orig_inode, from, count); | ||
665 | if (*err) | ||
666 | goto out; | ||
667 | |||
668 | *err = ext4_es_remove_extent(donor_inode, from, count); | ||
669 | if (*err) | ||
670 | goto out; | ||
671 | |||
672 | /* Get the original extent for the block "orig_off" */ | ||
673 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | ||
674 | if (*err) | ||
675 | goto out; | ||
676 | |||
677 | /* Get the donor extent for the head */ | ||
678 | *err = get_ext_path(donor_inode, donor_off, &donor_path); | ||
679 | if (*err) | ||
680 | goto out; | ||
681 | depth = ext_depth(orig_inode); | ||
682 | oext = orig_path[depth].p_ext; | ||
683 | tmp_oext = *oext; | ||
684 | |||
685 | depth = ext_depth(donor_inode); | ||
686 | dext = donor_path[depth].p_ext; | ||
687 | if (unlikely(!dext)) | ||
688 | goto missing_donor_extent; | ||
689 | tmp_dext = *dext; | ||
690 | |||
691 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | ||
692 | donor_off, count); | ||
693 | if (*err) | ||
694 | goto out; | ||
695 | |||
696 | /* Loop for the donor extents */ | ||
697 | while (1) { | ||
698 | /* The extent for donor must be found. */ | ||
699 | if (unlikely(!dext)) { | ||
700 | missing_donor_extent: | ||
701 | EXT4_ERROR_INODE(donor_inode, | ||
702 | "The extent for donor must be found"); | ||
703 | *err = -EIO; | ||
704 | goto out; | ||
705 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | ||
706 | EXT4_ERROR_INODE(donor_inode, | ||
707 | "Donor offset(%u) and the first block of donor " | ||
708 | "extent(%u) should be equal", | ||
709 | donor_off, | ||
710 | le32_to_cpu(tmp_dext.ee_block)); | ||
711 | *err = -EIO; | ||
712 | goto out; | ||
713 | } | ||
714 | |||
715 | /* Set donor extent to orig extent */ | ||
716 | *err = mext_leaf_block(handle, orig_inode, | ||
717 | orig_path, &tmp_dext, &orig_off); | ||
718 | if (*err) | ||
719 | goto out; | ||
720 | |||
721 | /* Set orig extent to donor extent */ | ||
722 | *err = mext_leaf_block(handle, donor_inode, | ||
723 | donor_path, &tmp_oext, &donor_off); | ||
724 | if (*err) | ||
725 | goto out; | ||
726 | |||
727 | dext_alen = ext4_ext_get_actual_len(&tmp_dext); | ||
728 | replaced_count += dext_alen; | ||
729 | donor_off += dext_alen; | ||
730 | orig_off += dext_alen; | ||
731 | |||
732 | BUG_ON(replaced_count > count); | ||
733 | /* Already moved the expected blocks */ | ||
734 | if (replaced_count >= count) | ||
735 | break; | ||
736 | |||
737 | if (orig_path) | ||
738 | ext4_ext_drop_refs(orig_path); | ||
739 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | ||
740 | if (*err) | ||
741 | goto out; | ||
742 | depth = ext_depth(orig_inode); | ||
743 | oext = orig_path[depth].p_ext; | ||
744 | tmp_oext = *oext; | ||
745 | |||
746 | if (donor_path) | ||
747 | ext4_ext_drop_refs(donor_path); | ||
748 | *err = get_ext_path(donor_inode, donor_off, &donor_path); | ||
749 | if (*err) | ||
750 | goto out; | ||
751 | depth = ext_depth(donor_inode); | ||
752 | dext = donor_path[depth].p_ext; | ||
753 | tmp_dext = *dext; | ||
754 | |||
755 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | ||
756 | donor_off, count - replaced_count); | ||
757 | if (*err) | ||
758 | goto out; | ||
759 | } | ||
760 | |||
761 | out: | ||
762 | if (orig_path) { | ||
763 | ext4_ext_drop_refs(orig_path); | ||
764 | kfree(orig_path); | ||
765 | } | ||
766 | if (donor_path) { | ||
767 | ext4_ext_drop_refs(donor_path); | ||
768 | kfree(donor_path); | ||
769 | } | ||
770 | |||
771 | return replaced_count; | ||
772 | } | ||
773 | |||
774 | /** | ||
775 | * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 | 123 | * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 |
776 | * | 124 | * |
777 | * @inode1: the inode structure | 125 | * @inode1: the inode structure |
778 | * @inode2: the inode structure | 126 | * @inode2: the inode structure |
779 | * @index: page index | 127 | * @index1: page index |
128 | * @index2: page index | ||
780 | * @page: result page vector | 129 | * @page: result page vector |
781 | * | 130 | * |
782 | * Grab two locked pages for inode's by inode order | 131 | * Grab two locked pages for inode's by inode order |
783 | */ | 132 | */ |
784 | static int | 133 | static int |
785 | mext_page_double_lock(struct inode *inode1, struct inode *inode2, | 134 | mext_page_double_lock(struct inode *inode1, struct inode *inode2, |
786 | pgoff_t index, struct page *page[2]) | 135 | pgoff_t index1, pgoff_t index2, struct page *page[2]) |
787 | { | 136 | { |
788 | struct address_space *mapping[2]; | 137 | struct address_space *mapping[2]; |
789 | unsigned fl = AOP_FLAG_NOFS; | 138 | unsigned fl = AOP_FLAG_NOFS; |
@@ -793,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2, | |||
793 | mapping[0] = inode1->i_mapping; | 142 | mapping[0] = inode1->i_mapping; |
794 | mapping[1] = inode2->i_mapping; | 143 | mapping[1] = inode2->i_mapping; |
795 | } else { | 144 | } else { |
145 | pgoff_t tmp = index1; | ||
146 | index1 = index2; | ||
147 | index2 = tmp; | ||
796 | mapping[0] = inode2->i_mapping; | 148 | mapping[0] = inode2->i_mapping; |
797 | mapping[1] = inode1->i_mapping; | 149 | mapping[1] = inode1->i_mapping; |
798 | } | 150 | } |
799 | 151 | ||
800 | page[0] = grab_cache_page_write_begin(mapping[0], index, fl); | 152 | page[0] = grab_cache_page_write_begin(mapping[0], index1, fl); |
801 | if (!page[0]) | 153 | if (!page[0]) |
802 | return -ENOMEM; | 154 | return -ENOMEM; |
803 | 155 | ||
804 | page[1] = grab_cache_page_write_begin(mapping[1], index, fl); | 156 | page[1] = grab_cache_page_write_begin(mapping[1], index2, fl); |
805 | if (!page[1]) { | 157 | if (!page[1]) { |
806 | unlock_page(page[0]); | 158 | unlock_page(page[0]); |
807 | page_cache_release(page[0]); | 159 | page_cache_release(page[0]); |
@@ -893,25 +245,27 @@ out: | |||
893 | * @o_filp: file structure of original file | 245 | * @o_filp: file structure of original file |
894 | * @donor_inode: donor inode | 246 | * @donor_inode: donor inode |
895 | * @orig_page_offset: page index on original file | 247 | * @orig_page_offset: page index on original file |
248 | * @donor_page_offset: page index on donor file | ||
896 | * @data_offset_in_page: block index where data swapping starts | 249 | * @data_offset_in_page: block index where data swapping starts |
897 | * @block_len_in_page: the number of blocks to be swapped | 250 | * @block_len_in_page: the number of blocks to be swapped |
898 | * @unwritten: orig extent is unwritten or not | 251 | * @unwritten: orig extent is unwritten or not |
899 | * @err: pointer to save return value | 252 | * @err: pointer to save return value |
900 | * | 253 | * |
901 | * Save the data in original inode blocks and replace original inode extents | 254 | * Save the data in original inode blocks and replace original inode extents |
902 | * with donor inode extents by calling mext_replace_branches(). | 255 | * with donor inode extents by calling ext4_swap_extents(). |
903 | * Finally, write out the saved data in new original inode blocks. Return | 256 | * Finally, write out the saved data in new original inode blocks. Return |
904 | * replaced block count. | 257 | * replaced block count. |
905 | */ | 258 | */ |
906 | static int | 259 | static int |
907 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | 260 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
908 | pgoff_t orig_page_offset, int data_offset_in_page, | 261 | pgoff_t orig_page_offset, pgoff_t donor_page_offset, |
909 | int block_len_in_page, int unwritten, int *err) | 262 | int data_offset_in_page, |
263 | int block_len_in_page, int unwritten, int *err) | ||
910 | { | 264 | { |
911 | struct inode *orig_inode = file_inode(o_filp); | 265 | struct inode *orig_inode = file_inode(o_filp); |
912 | struct page *pagep[2] = {NULL, NULL}; | 266 | struct page *pagep[2] = {NULL, NULL}; |
913 | handle_t *handle; | 267 | handle_t *handle; |
914 | ext4_lblk_t orig_blk_offset; | 268 | ext4_lblk_t orig_blk_offset, donor_blk_offset; |
915 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 269 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
916 | unsigned int w_flags = 0; | 270 | unsigned int w_flags = 0; |
917 | unsigned int tmp_data_size, data_size, replaced_size; | 271 | unsigned int tmp_data_size, data_size, replaced_size; |
@@ -939,6 +293,9 @@ again: | |||
939 | orig_blk_offset = orig_page_offset * blocks_per_page + | 293 | orig_blk_offset = orig_page_offset * blocks_per_page + |
940 | data_offset_in_page; | 294 | data_offset_in_page; |
941 | 295 | ||
296 | donor_blk_offset = donor_page_offset * blocks_per_page + | ||
297 | data_offset_in_page; | ||
298 | |||
942 | /* Calculate data_size */ | 299 | /* Calculate data_size */ |
943 | if ((orig_blk_offset + block_len_in_page - 1) == | 300 | if ((orig_blk_offset + block_len_in_page - 1) == |
944 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 301 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
@@ -959,7 +316,7 @@ again: | |||
959 | replaced_size = data_size; | 316 | replaced_size = data_size; |
960 | 317 | ||
961 | *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, | 318 | *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, |
962 | pagep); | 319 | donor_page_offset, pagep); |
963 | if (unlikely(*err < 0)) | 320 | if (unlikely(*err < 0)) |
964 | goto stop_journal; | 321 | goto stop_journal; |
965 | /* | 322 | /* |
@@ -978,7 +335,7 @@ again: | |||
978 | if (*err) | 335 | if (*err) |
979 | goto drop_data_sem; | 336 | goto drop_data_sem; |
980 | 337 | ||
981 | unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, | 338 | unwritten &= mext_check_coverage(donor_inode, donor_blk_offset, |
982 | block_len_in_page, 1, err); | 339 | block_len_in_page, 1, err); |
983 | if (*err) | 340 | if (*err) |
984 | goto drop_data_sem; | 341 | goto drop_data_sem; |
@@ -994,9 +351,10 @@ again: | |||
994 | *err = -EBUSY; | 351 | *err = -EBUSY; |
995 | goto drop_data_sem; | 352 | goto drop_data_sem; |
996 | } | 353 | } |
997 | replaced_count = mext_replace_branches(handle, orig_inode, | 354 | replaced_count = ext4_swap_extents(handle, orig_inode, |
998 | donor_inode, orig_blk_offset, | 355 | donor_inode, orig_blk_offset, |
999 | block_len_in_page, err); | 356 | donor_blk_offset, |
357 | block_len_in_page, 1, err); | ||
1000 | drop_data_sem: | 358 | drop_data_sem: |
1001 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 359 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
1002 | goto unlock_pages; | 360 | goto unlock_pages; |
@@ -1014,9 +372,9 @@ data_copy: | |||
1014 | goto unlock_pages; | 372 | goto unlock_pages; |
1015 | } | 373 | } |
1016 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 374 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
1017 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, | 375 | replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode, |
1018 | orig_blk_offset, | 376 | orig_blk_offset, donor_blk_offset, |
1019 | block_len_in_page, err); | 377 | block_len_in_page, 1, err); |
1020 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 378 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
1021 | if (*err) { | 379 | if (*err) { |
1022 | if (replaced_count) { | 380 | if (replaced_count) { |
@@ -1061,9 +419,9 @@ repair_branches: | |||
1061 | * Try to swap extents to it's original places | 419 | * Try to swap extents to it's original places |
1062 | */ | 420 | */ |
1063 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 421 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
1064 | replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, | 422 | replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode, |
1065 | orig_blk_offset, | 423 | orig_blk_offset, donor_blk_offset, |
1066 | block_len_in_page, &err2); | 424 | block_len_in_page, 0, &err2); |
1067 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 425 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
1068 | if (replaced_count != block_len_in_page) { | 426 | if (replaced_count != block_len_in_page) { |
1069 | EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), | 427 | EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), |
@@ -1093,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode, | |||
1093 | struct inode *donor_inode, __u64 orig_start, | 451 | struct inode *donor_inode, __u64 orig_start, |
1094 | __u64 donor_start, __u64 *len) | 452 | __u64 donor_start, __u64 *len) |
1095 | { | 453 | { |
1096 | ext4_lblk_t orig_blocks, donor_blocks; | 454 | __u64 orig_eof, donor_eof; |
1097 | unsigned int blkbits = orig_inode->i_blkbits; | 455 | unsigned int blkbits = orig_inode->i_blkbits; |
1098 | unsigned int blocksize = 1 << blkbits; | 456 | unsigned int blocksize = 1 << blkbits; |
1099 | 457 | ||
458 | orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits; | ||
459 | donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits; | ||
460 | |||
461 | |||
1100 | if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { | 462 | if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { |
1101 | ext4_debug("ext4 move extent: suid or sgid is set" | 463 | ext4_debug("ext4 move extent: suid or sgid is set" |
1102 | " to donor file [ino:orig %lu, donor %lu]\n", | 464 | " to donor file [ino:orig %lu, donor %lu]\n", |
@@ -1112,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode, | |||
1112 | ext4_debug("ext4 move extent: The argument files should " | 474 | ext4_debug("ext4 move extent: The argument files should " |
1113 | "not be swapfile [ino:orig %lu, donor %lu]\n", | 475 | "not be swapfile [ino:orig %lu, donor %lu]\n", |
1114 | orig_inode->i_ino, donor_inode->i_ino); | 476 | orig_inode->i_ino, donor_inode->i_ino); |
1115 | return -EINVAL; | 477 | return -EBUSY; |
1116 | } | 478 | } |
1117 | 479 | ||
1118 | /* Ext4 move extent supports only extent based file */ | 480 | /* Ext4 move extent supports only extent based file */ |
@@ -1132,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode, | |||
1132 | } | 494 | } |
1133 | 495 | ||
1134 | /* Start offset should be same */ | 496 | /* Start offset should be same */ |
1135 | if (orig_start != donor_start) { | 497 | if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) != |
498 | (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) { | ||
1136 | ext4_debug("ext4 move extent: orig and donor's start " | 499 | ext4_debug("ext4 move extent: orig and donor's start " |
1137 | "offset are not same [ino:orig %lu, donor %lu]\n", | 500 | "offset are not alligned [ino:orig %lu, donor %lu]\n", |
1138 | orig_inode->i_ino, donor_inode->i_ino); | 501 | orig_inode->i_ino, donor_inode->i_ino); |
1139 | return -EINVAL; | 502 | return -EINVAL; |
1140 | } | 503 | } |
1141 | 504 | ||
1142 | if ((orig_start >= EXT_MAX_BLOCKS) || | 505 | if ((orig_start >= EXT_MAX_BLOCKS) || |
506 | (donor_start >= EXT_MAX_BLOCKS) || | ||
1143 | (*len > EXT_MAX_BLOCKS) || | 507 | (*len > EXT_MAX_BLOCKS) || |
508 | (donor_start + *len >= EXT_MAX_BLOCKS) || | ||
1144 | (orig_start + *len >= EXT_MAX_BLOCKS)) { | 509 | (orig_start + *len >= EXT_MAX_BLOCKS)) { |
1145 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " | 510 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
1146 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, | 511 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, |
1147 | orig_inode->i_ino, donor_inode->i_ino); | 512 | orig_inode->i_ino, donor_inode->i_ino); |
1148 | return -EINVAL; | 513 | return -EINVAL; |
1149 | } | 514 | } |
1150 | 515 | if (orig_eof < orig_start + *len - 1) | |
1151 | if (orig_inode->i_size > donor_inode->i_size) { | 516 | *len = orig_eof - orig_start; |
1152 | donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; | 517 | if (donor_eof < donor_start + *len - 1) |
1153 | /* TODO: eliminate this artificial restriction */ | 518 | *len = donor_eof - donor_start; |
1154 | if (orig_start >= donor_blocks) { | ||
1155 | ext4_debug("ext4 move extent: orig start offset " | ||
1156 | "[%llu] should be less than donor file blocks " | ||
1157 | "[%u] [ino:orig %lu, donor %lu]\n", | ||
1158 | orig_start, donor_blocks, | ||
1159 | orig_inode->i_ino, donor_inode->i_ino); | ||
1160 | return -EINVAL; | ||
1161 | } | ||
1162 | |||
1163 | /* TODO: eliminate this artificial restriction */ | ||
1164 | if (orig_start + *len > donor_blocks) { | ||
1165 | ext4_debug("ext4 move extent: End offset [%llu] should " | ||
1166 | "be less than donor file blocks [%u]." | ||
1167 | "So adjust length from %llu to %llu " | ||
1168 | "[ino:orig %lu, donor %lu]\n", | ||
1169 | orig_start + *len, donor_blocks, | ||
1170 | *len, donor_blocks - orig_start, | ||
1171 | orig_inode->i_ino, donor_inode->i_ino); | ||
1172 | *len = donor_blocks - orig_start; | ||
1173 | } | ||
1174 | } else { | ||
1175 | orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; | ||
1176 | if (orig_start >= orig_blocks) { | ||
1177 | ext4_debug("ext4 move extent: start offset [%llu] " | ||
1178 | "should be less than original file blocks " | ||
1179 | "[%u] [ino:orig %lu, donor %lu]\n", | ||
1180 | orig_start, orig_blocks, | ||
1181 | orig_inode->i_ino, donor_inode->i_ino); | ||
1182 | return -EINVAL; | ||
1183 | } | ||
1184 | |||
1185 | if (orig_start + *len > orig_blocks) { | ||
1186 | ext4_debug("ext4 move extent: Adjust length " | ||
1187 | "from %llu to %llu. Because it should be " | ||
1188 | "less than original file blocks " | ||
1189 | "[ino:orig %lu, donor %lu]\n", | ||
1190 | *len, orig_blocks - orig_start, | ||
1191 | orig_inode->i_ino, donor_inode->i_ino); | ||
1192 | *len = orig_blocks - orig_start; | ||
1193 | } | ||
1194 | } | ||
1195 | |||
1196 | if (!*len) { | 519 | if (!*len) { |
1197 | ext4_debug("ext4 move extent: len should not be 0 " | 520 | ext4_debug("ext4 move extent: len should not be 0 " |
1198 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | 521 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, |
@@ -1208,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode, | |||
1208 | * | 531 | * |
1209 | * @o_filp: file structure of the original file | 532 | * @o_filp: file structure of the original file |
1210 | * @d_filp: file structure of the donor file | 533 | * @d_filp: file structure of the donor file |
1211 | * @orig_start: start offset in block for orig | 534 | * @orig_blk: start offset in block for orig |
1212 | * @donor_start: start offset in block for donor | 535 | * @donor_blk: start offset in block for donor |
1213 | * @len: the number of blocks to be moved | 536 | * @len: the number of blocks to be moved |
1214 | * @moved_len: moved block length | 537 | * @moved_len: moved block length |
1215 | * | 538 | * |
1216 | * This function returns 0 and moved block length is set in moved_len | 539 | * This function returns 0 and moved block length is set in moved_len |
1217 | * if succeed, otherwise returns error value. | 540 | * if succeed, otherwise returns error value. |
1218 | * | 541 | * |
1219 | * Note: ext4_move_extents() proceeds the following order. | ||
1220 | * 1:ext4_move_extents() calculates the last block number of moving extent | ||
1221 | * function by the start block number (orig_start) and the number of blocks | ||
1222 | * to be moved (len) specified as arguments. | ||
1223 | * If the {orig, donor}_start points a hole, the extent's start offset | ||
1224 | * pointed by ext_cur (current extent), holecheck_path, orig_path are set | ||
1225 | * after hole behind. | ||
1226 | * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent | ||
1227 | * or the ext_cur exceeds the block_end which is last logical block number. | ||
1228 | * 3:To get the length of continues area, call mext_next_extent() | ||
1229 | * specified with the ext_cur (initial value is holecheck_path) re-cursive, | ||
1230 | * until find un-continuous extent, the start logical block number exceeds | ||
1231 | * the block_end or the extent points to the last extent. | ||
1232 | * 4:Exchange the original inode data with donor inode data | ||
1233 | * from orig_page_offset to seq_end_page. | ||
1234 | * The start indexes of data are specified as arguments. | ||
1235 | * That of the original inode is orig_page_offset, | ||
1236 | * and the donor inode is also orig_page_offset | ||
1237 | * (To easily handle blocksize != pagesize case, the offset for the | ||
1238 | * donor inode is block unit). | ||
1239 | * 5:Update holecheck_path and orig_path to points a next proceeding extent, | ||
1240 | * then returns to step 2. | ||
1241 | * 6:Release holecheck_path, orig_path and set the len to moved_len | ||
1242 | * which shows the number of moved blocks. | ||
1243 | * The moved_len is useful for the command to calculate the file offset | ||
1244 | * for starting next move extent ioctl. | ||
1245 | * 7:Return 0 on success, or a negative error value on failure. | ||
1246 | */ | 542 | */ |
1247 | int | 543 | int |
1248 | ext4_move_extents(struct file *o_filp, struct file *d_filp, | 544 | ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, |
1249 | __u64 orig_start, __u64 donor_start, __u64 len, | 545 | __u64 donor_blk, __u64 len, __u64 *moved_len) |
1250 | __u64 *moved_len) | ||
1251 | { | 546 | { |
1252 | struct inode *orig_inode = file_inode(o_filp); | 547 | struct inode *orig_inode = file_inode(o_filp); |
1253 | struct inode *donor_inode = file_inode(d_filp); | 548 | struct inode *donor_inode = file_inode(d_filp); |
1254 | struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; | 549 | struct ext4_ext_path *path = NULL; |
1255 | struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; | ||
1256 | ext4_lblk_t block_start = orig_start; | ||
1257 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | ||
1258 | ext4_lblk_t rest_blocks; | ||
1259 | pgoff_t orig_page_offset = 0, seq_end_page; | ||
1260 | int ret, depth, last_extent = 0; | ||
1261 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 550 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
1262 | int data_offset_in_page; | 551 | ext4_lblk_t o_end, o_start = orig_blk; |
1263 | int block_len_in_page; | 552 | ext4_lblk_t d_start = donor_blk; |
1264 | int unwritten; | 553 | int ret; |
1265 | 554 | ||
1266 | if (orig_inode->i_sb != donor_inode->i_sb) { | 555 | if (orig_inode->i_sb != donor_inode->i_sb) { |
1267 | ext4_debug("ext4 move extent: The argument files " | 556 | ext4_debug("ext4 move extent: The argument files " |
@@ -1303,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1303 | /* Protect extent tree against block allocations via delalloc */ | 592 | /* Protect extent tree against block allocations via delalloc */ |
1304 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 593 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
1305 | /* Check the filesystem environment whether move_extent can be done */ | 594 | /* Check the filesystem environment whether move_extent can be done */ |
1306 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | 595 | ret = mext_check_arguments(orig_inode, donor_inode, orig_blk, |
1307 | donor_start, &len); | 596 | donor_blk, &len); |
1308 | if (ret) | 597 | if (ret) |
1309 | goto out; | 598 | goto out; |
599 | o_end = o_start + len; | ||
1310 | 600 | ||
1311 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 601 | while (o_start < o_end) { |
1312 | block_end = block_start + len - 1; | 602 | struct ext4_extent *ex; |
1313 | if (file_end < block_end) | 603 | ext4_lblk_t cur_blk, next_blk; |
1314 | len -= block_end - file_end; | 604 | pgoff_t orig_page_index, donor_page_index; |
605 | int offset_in_page; | ||
606 | int unwritten, cur_len; | ||
1315 | 607 | ||
1316 | ret = get_ext_path(orig_inode, block_start, &orig_path); | 608 | ret = get_ext_path(orig_inode, o_start, &path); |
1317 | if (ret) | 609 | if (ret) |
1318 | goto out; | ||
1319 | |||
1320 | /* Get path structure to check the hole */ | ||
1321 | ret = get_ext_path(orig_inode, block_start, &holecheck_path); | ||
1322 | if (ret) | ||
1323 | goto out; | ||
1324 | |||
1325 | depth = ext_depth(orig_inode); | ||
1326 | ext_cur = holecheck_path[depth].p_ext; | ||
1327 | |||
1328 | /* | ||
1329 | * Get proper starting location of block replacement if block_start was | ||
1330 | * within the hole. | ||
1331 | */ | ||
1332 | if (le32_to_cpu(ext_cur->ee_block) + | ||
1333 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { | ||
1334 | /* | ||
1335 | * The hole exists between extents or the tail of | ||
1336 | * original file. | ||
1337 | */ | ||
1338 | last_extent = mext_next_extent(orig_inode, | ||
1339 | holecheck_path, &ext_cur); | ||
1340 | if (last_extent < 0) { | ||
1341 | ret = last_extent; | ||
1342 | goto out; | ||
1343 | } | ||
1344 | last_extent = mext_next_extent(orig_inode, orig_path, | ||
1345 | &ext_dummy); | ||
1346 | if (last_extent < 0) { | ||
1347 | ret = last_extent; | ||
1348 | goto out; | 610 | goto out; |
1349 | } | 611 | ex = path[path->p_depth].p_ext; |
1350 | seq_start = le32_to_cpu(ext_cur->ee_block); | 612 | next_blk = ext4_ext_next_allocated_block(path); |
1351 | } else if (le32_to_cpu(ext_cur->ee_block) > block_start) | 613 | cur_blk = le32_to_cpu(ex->ee_block); |
1352 | /* The hole exists at the beginning of original file. */ | 614 | cur_len = ext4_ext_get_actual_len(ex); |
1353 | seq_start = le32_to_cpu(ext_cur->ee_block); | 615 | /* Check hole before the start pos */ |
1354 | else | 616 | if (cur_blk + cur_len - 1 < o_start) { |
1355 | seq_start = block_start; | 617 | if (next_blk == EXT_MAX_BLOCKS) { |
1356 | 618 | o_start = o_end; | |
1357 | /* No blocks within the specified range. */ | 619 | ret = -ENODATA; |
1358 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 620 | goto out; |
1359 | ext4_debug("ext4 move extent: The specified range of file " | 621 | } |
1360 | "may be the hole\n"); | 622 | d_start += next_blk - o_start; |
1361 | ret = -EINVAL; | 623 | o_start = next_blk; |
1362 | goto out; | ||
1363 | } | ||
1364 | |||
1365 | /* Adjust start blocks */ | ||
1366 | add_blocks = min(le32_to_cpu(ext_cur->ee_block) + | ||
1367 | ext4_ext_get_actual_len(ext_cur), block_end + 1) - | ||
1368 | max(le32_to_cpu(ext_cur->ee_block), block_start); | ||
1369 | |||
1370 | while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { | ||
1371 | seq_blocks += add_blocks; | ||
1372 | |||
1373 | /* Adjust tail blocks */ | ||
1374 | if (seq_start + seq_blocks - 1 > block_end) | ||
1375 | seq_blocks = block_end - seq_start + 1; | ||
1376 | |||
1377 | ext_prev = ext_cur; | ||
1378 | last_extent = mext_next_extent(orig_inode, holecheck_path, | ||
1379 | &ext_cur); | ||
1380 | if (last_extent < 0) { | ||
1381 | ret = last_extent; | ||
1382 | break; | ||
1383 | } | ||
1384 | add_blocks = ext4_ext_get_actual_len(ext_cur); | ||
1385 | |||
1386 | /* | ||
1387 | * Extend the length of contiguous block (seq_blocks) | ||
1388 | * if extents are contiguous. | ||
1389 | */ | ||
1390 | if (ext4_can_extents_be_merged(orig_inode, | ||
1391 | ext_prev, ext_cur) && | ||
1392 | block_end >= le32_to_cpu(ext_cur->ee_block) && | ||
1393 | !last_extent) | ||
1394 | continue; | 624 | continue; |
1395 | 625 | /* Check hole after the start pos */ | |
1396 | /* Is original extent is unwritten */ | 626 | } else if (cur_blk > o_start) { |
1397 | unwritten = ext4_ext_is_unwritten(ext_prev); | 627 | /* Skip hole */ |
1398 | 628 | d_start += cur_blk - o_start; | |
1399 | data_offset_in_page = seq_start % blocks_per_page; | 629 | o_start = cur_blk; |
1400 | 630 | /* Extent inside requested range ?*/ | |
1401 | /* | 631 | if (cur_blk >= o_end) |
1402 | * Calculate data blocks count that should be swapped | 632 | goto out; |
1403 | * at the first page. | 633 | } else { /* in_range(o_start, o_blk, o_len) */ |
1404 | */ | 634 | cur_len += cur_blk - o_start; |
1405 | if (data_offset_in_page + seq_blocks > blocks_per_page) { | ||
1406 | /* Swapped blocks are across pages */ | ||
1407 | block_len_in_page = | ||
1408 | blocks_per_page - data_offset_in_page; | ||
1409 | } else { | ||
1410 | /* Swapped blocks are in a page */ | ||
1411 | block_len_in_page = seq_blocks; | ||
1412 | } | 635 | } |
1413 | 636 | unwritten = ext4_ext_is_unwritten(ex); | |
1414 | orig_page_offset = seq_start >> | 637 | if (o_end - o_start < cur_len) |
1415 | (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); | 638 | cur_len = o_end - o_start; |
1416 | seq_end_page = (seq_start + seq_blocks - 1) >> | 639 | |
1417 | (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); | 640 | orig_page_index = o_start >> (PAGE_CACHE_SHIFT - |
1418 | seq_start = le32_to_cpu(ext_cur->ee_block); | 641 | orig_inode->i_blkbits); |
1419 | rest_blocks = seq_blocks; | 642 | donor_page_index = d_start >> (PAGE_CACHE_SHIFT - |
1420 | 643 | donor_inode->i_blkbits); | |
644 | offset_in_page = o_start % blocks_per_page; | ||
645 | if (cur_len > blocks_per_page- offset_in_page) | ||
646 | cur_len = blocks_per_page - offset_in_page; | ||
1421 | /* | 647 | /* |
1422 | * Up semaphore to avoid following problems: | 648 | * Up semaphore to avoid following problems: |
1423 | * a. transaction deadlock among ext4_journal_start, | 649 | * a. transaction deadlock among ext4_journal_start, |
@@ -1426,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1426 | * in move_extent_per_page | 652 | * in move_extent_per_page |
1427 | */ | 653 | */ |
1428 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 654 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
1429 | 655 | /* Swap original branches with new branches */ | |
1430 | while (orig_page_offset <= seq_end_page) { | 656 | move_extent_per_page(o_filp, donor_inode, |
1431 | 657 | orig_page_index, donor_page_index, | |
1432 | /* Swap original branches with new branches */ | 658 | offset_in_page, cur_len, |
1433 | block_len_in_page = move_extent_per_page( | 659 | unwritten, &ret); |
1434 | o_filp, donor_inode, | ||
1435 | orig_page_offset, | ||
1436 | data_offset_in_page, | ||
1437 | block_len_in_page, | ||
1438 | unwritten, &ret); | ||
1439 | |||
1440 | /* Count how many blocks we have exchanged */ | ||
1441 | *moved_len += block_len_in_page; | ||
1442 | if (ret < 0) | ||
1443 | break; | ||
1444 | if (*moved_len > len) { | ||
1445 | EXT4_ERROR_INODE(orig_inode, | ||
1446 | "We replaced blocks too much! " | ||
1447 | "sum of replaced: %llu requested: %llu", | ||
1448 | *moved_len, len); | ||
1449 | ret = -EIO; | ||
1450 | break; | ||
1451 | } | ||
1452 | |||
1453 | orig_page_offset++; | ||
1454 | data_offset_in_page = 0; | ||
1455 | rest_blocks -= block_len_in_page; | ||
1456 | if (rest_blocks > blocks_per_page) | ||
1457 | block_len_in_page = blocks_per_page; | ||
1458 | else | ||
1459 | block_len_in_page = rest_blocks; | ||
1460 | } | ||
1461 | |||
1462 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 660 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
1463 | if (ret < 0) | 661 | if (ret < 0) |
1464 | break; | 662 | break; |
1465 | 663 | o_start += cur_len; | |
1466 | /* Decrease buffer counter */ | 664 | d_start += cur_len; |
1467 | if (holecheck_path) | ||
1468 | ext4_ext_drop_refs(holecheck_path); | ||
1469 | ret = get_ext_path(orig_inode, seq_start, &holecheck_path); | ||
1470 | if (ret) | ||
1471 | break; | ||
1472 | depth = holecheck_path->p_depth; | ||
1473 | |||
1474 | /* Decrease buffer counter */ | ||
1475 | if (orig_path) | ||
1476 | ext4_ext_drop_refs(orig_path); | ||
1477 | ret = get_ext_path(orig_inode, seq_start, &orig_path); | ||
1478 | if (ret) | ||
1479 | break; | ||
1480 | |||
1481 | ext_cur = holecheck_path[depth].p_ext; | ||
1482 | add_blocks = ext4_ext_get_actual_len(ext_cur); | ||
1483 | seq_blocks = 0; | ||
1484 | |||
1485 | } | 665 | } |
666 | *moved_len = o_start - orig_blk; | ||
667 | if (*moved_len > len) | ||
668 | *moved_len = len; | ||
669 | |||
1486 | out: | 670 | out: |
1487 | if (*moved_len) { | 671 | if (*moved_len) { |
1488 | ext4_discard_preallocations(orig_inode); | 672 | ext4_discard_preallocations(orig_inode); |
1489 | ext4_discard_preallocations(donor_inode); | 673 | ext4_discard_preallocations(donor_inode); |
1490 | } | 674 | } |
1491 | 675 | ||
1492 | if (orig_path) { | 676 | ext4_ext_drop_refs(path); |
1493 | ext4_ext_drop_refs(orig_path); | 677 | kfree(path); |
1494 | kfree(orig_path); | ||
1495 | } | ||
1496 | if (holecheck_path) { | ||
1497 | ext4_ext_drop_refs(holecheck_path); | ||
1498 | kfree(holecheck_path); | ||
1499 | } | ||
1500 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 678 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
1501 | ext4_inode_resume_unlocked_dio(orig_inode); | 679 | ext4_inode_resume_unlocked_dio(orig_inode); |
1502 | ext4_inode_resume_unlocked_dio(donor_inode); | 680 | ext4_inode_resume_unlocked_dio(donor_inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 603e4ebbd0ac..426211882f72 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -53,7 +53,7 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
53 | ext4_lblk_t *block) | 53 | ext4_lblk_t *block) |
54 | { | 54 | { |
55 | struct buffer_head *bh; | 55 | struct buffer_head *bh; |
56 | int err = 0; | 56 | int err; |
57 | 57 | ||
58 | if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && | 58 | if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && |
59 | ((inode->i_size >> 10) >= | 59 | ((inode->i_size >> 10) >= |
@@ -62,9 +62,9 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
62 | 62 | ||
63 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; | 63 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; |
64 | 64 | ||
65 | bh = ext4_bread(handle, inode, *block, 1, &err); | 65 | bh = ext4_bread(handle, inode, *block, 1); |
66 | if (!bh) | 66 | if (IS_ERR(bh)) |
67 | return ERR_PTR(err); | 67 | return bh; |
68 | inode->i_size += inode->i_sb->s_blocksize; | 68 | inode->i_size += inode->i_sb->s_blocksize; |
69 | EXT4_I(inode)->i_disksize = inode->i_size; | 69 | EXT4_I(inode)->i_disksize = inode->i_size; |
70 | BUFFER_TRACE(bh, "get_write_access"); | 70 | BUFFER_TRACE(bh, "get_write_access"); |
@@ -94,20 +94,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, | |||
94 | { | 94 | { |
95 | struct buffer_head *bh; | 95 | struct buffer_head *bh; |
96 | struct ext4_dir_entry *dirent; | 96 | struct ext4_dir_entry *dirent; |
97 | int err = 0, is_dx_block = 0; | 97 | int is_dx_block = 0; |
98 | 98 | ||
99 | bh = ext4_bread(NULL, inode, block, 0, &err); | 99 | bh = ext4_bread(NULL, inode, block, 0); |
100 | if (!bh) { | 100 | if (IS_ERR(bh)) { |
101 | if (err == 0) { | ||
102 | ext4_error_inode(inode, __func__, line, block, | ||
103 | "Directory hole found"); | ||
104 | return ERR_PTR(-EIO); | ||
105 | } | ||
106 | __ext4_warning(inode->i_sb, __func__, line, | 101 | __ext4_warning(inode->i_sb, __func__, line, |
107 | "error reading directory block " | 102 | "error %ld reading directory block " |
108 | "(ino %lu, block %lu)", inode->i_ino, | 103 | "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino, |
109 | (unsigned long) block); | 104 | (unsigned long) block); |
110 | return ERR_PTR(err); | 105 | |
106 | return bh; | ||
107 | } | ||
108 | if (!bh) { | ||
109 | ext4_error_inode(inode, __func__, line, block, "Directory hole found"); | ||
110 | return ERR_PTR(-EIO); | ||
111 | } | 111 | } |
112 | dirent = (struct ext4_dir_entry *) bh->b_data; | 112 | dirent = (struct ext4_dir_entry *) bh->b_data; |
113 | /* Determine whether or not we have an index block */ | 113 | /* Determine whether or not we have an index block */ |
@@ -124,8 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, | |||
124 | "directory leaf block found instead of index block"); | 124 | "directory leaf block found instead of index block"); |
125 | return ERR_PTR(-EIO); | 125 | return ERR_PTR(-EIO); |
126 | } | 126 | } |
127 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 127 | if (!ext4_has_metadata_csum(inode->i_sb) || |
128 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) || | ||
129 | buffer_verified(bh)) | 128 | buffer_verified(bh)) |
130 | return bh; | 129 | return bh; |
131 | 130 | ||
@@ -253,8 +252,7 @@ static unsigned dx_node_limit(struct inode *dir); | |||
253 | static struct dx_frame *dx_probe(const struct qstr *d_name, | 252 | static struct dx_frame *dx_probe(const struct qstr *d_name, |
254 | struct inode *dir, | 253 | struct inode *dir, |
255 | struct dx_hash_info *hinfo, | 254 | struct dx_hash_info *hinfo, |
256 | struct dx_frame *frame, | 255 | struct dx_frame *frame); |
257 | int *err); | ||
258 | static void dx_release(struct dx_frame *frames); | 256 | static void dx_release(struct dx_frame *frames); |
259 | static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, | 257 | static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, |
260 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 258 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
@@ -270,8 +268,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
270 | __u32 *start_hash); | 268 | __u32 *start_hash); |
271 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, | 269 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, |
272 | const struct qstr *d_name, | 270 | const struct qstr *d_name, |
273 | struct ext4_dir_entry_2 **res_dir, | 271 | struct ext4_dir_entry_2 **res_dir); |
274 | int *err); | ||
275 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 272 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
276 | struct inode *inode); | 273 | struct inode *inode); |
277 | 274 | ||
@@ -340,8 +337,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) | |||
340 | { | 337 | { |
341 | struct ext4_dir_entry_tail *t; | 338 | struct ext4_dir_entry_tail *t; |
342 | 339 | ||
343 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 340 | if (!ext4_has_metadata_csum(inode->i_sb)) |
344 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
345 | return 1; | 341 | return 1; |
346 | 342 | ||
347 | t = get_dirent_tail(inode, dirent); | 343 | t = get_dirent_tail(inode, dirent); |
@@ -362,8 +358,7 @@ static void ext4_dirent_csum_set(struct inode *inode, | |||
362 | { | 358 | { |
363 | struct ext4_dir_entry_tail *t; | 359 | struct ext4_dir_entry_tail *t; |
364 | 360 | ||
365 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 361 | if (!ext4_has_metadata_csum(inode->i_sb)) |
366 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
367 | return; | 362 | return; |
368 | 363 | ||
369 | t = get_dirent_tail(inode, dirent); | 364 | t = get_dirent_tail(inode, dirent); |
@@ -438,8 +433,7 @@ static int ext4_dx_csum_verify(struct inode *inode, | |||
438 | struct dx_tail *t; | 433 | struct dx_tail *t; |
439 | int count_offset, limit, count; | 434 | int count_offset, limit, count; |
440 | 435 | ||
441 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 436 | if (!ext4_has_metadata_csum(inode->i_sb)) |
442 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
443 | return 1; | 437 | return 1; |
444 | 438 | ||
445 | c = get_dx_countlimit(inode, dirent, &count_offset); | 439 | c = get_dx_countlimit(inode, dirent, &count_offset); |
@@ -468,8 +462,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) | |||
468 | struct dx_tail *t; | 462 | struct dx_tail *t; |
469 | int count_offset, limit, count; | 463 | int count_offset, limit, count; |
470 | 464 | ||
471 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 465 | if (!ext4_has_metadata_csum(inode->i_sb)) |
472 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
473 | return; | 466 | return; |
474 | 467 | ||
475 | c = get_dx_countlimit(inode, dirent, &count_offset); | 468 | c = get_dx_countlimit(inode, dirent, &count_offset); |
@@ -557,8 +550,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) | |||
557 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 550 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
558 | EXT4_DIR_REC_LEN(2) - infosize; | 551 | EXT4_DIR_REC_LEN(2) - infosize; |
559 | 552 | ||
560 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 553 | if (ext4_has_metadata_csum(dir->i_sb)) |
561 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
562 | entry_space -= sizeof(struct dx_tail); | 554 | entry_space -= sizeof(struct dx_tail); |
563 | return entry_space / sizeof(struct dx_entry); | 555 | return entry_space / sizeof(struct dx_entry); |
564 | } | 556 | } |
@@ -567,8 +559,7 @@ static inline unsigned dx_node_limit(struct inode *dir) | |||
567 | { | 559 | { |
568 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 560 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
569 | 561 | ||
570 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 562 | if (ext4_has_metadata_csum(dir->i_sb)) |
571 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
572 | entry_space -= sizeof(struct dx_tail); | 563 | entry_space -= sizeof(struct dx_tail); |
573 | return entry_space / sizeof(struct dx_entry); | 564 | return entry_space / sizeof(struct dx_entry); |
574 | } | 565 | } |
@@ -641,7 +632,9 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
641 | u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; | 632 | u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; |
642 | struct stats stats; | 633 | struct stats stats; |
643 | printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); | 634 | printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); |
644 | if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue; | 635 | bh = ext4_bread(NULL,dir, block, 0); |
636 | if (!bh || IS_ERR(bh)) | ||
637 | continue; | ||
645 | stats = levels? | 638 | stats = levels? |
646 | dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): | 639 | dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): |
647 | dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); | 640 | dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); |
@@ -669,29 +662,25 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
669 | */ | 662 | */ |
670 | static struct dx_frame * | 663 | static struct dx_frame * |
671 | dx_probe(const struct qstr *d_name, struct inode *dir, | 664 | dx_probe(const struct qstr *d_name, struct inode *dir, |
672 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 665 | struct dx_hash_info *hinfo, struct dx_frame *frame_in) |
673 | { | 666 | { |
674 | unsigned count, indirect; | 667 | unsigned count, indirect; |
675 | struct dx_entry *at, *entries, *p, *q, *m; | 668 | struct dx_entry *at, *entries, *p, *q, *m; |
676 | struct dx_root *root; | 669 | struct dx_root *root; |
677 | struct buffer_head *bh; | ||
678 | struct dx_frame *frame = frame_in; | 670 | struct dx_frame *frame = frame_in; |
671 | struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); | ||
679 | u32 hash; | 672 | u32 hash; |
680 | 673 | ||
681 | frame->bh = NULL; | 674 | frame->bh = ext4_read_dirblock(dir, 0, INDEX); |
682 | bh = ext4_read_dirblock(dir, 0, INDEX); | 675 | if (IS_ERR(frame->bh)) |
683 | if (IS_ERR(bh)) { | 676 | return (struct dx_frame *) frame->bh; |
684 | *err = PTR_ERR(bh); | 677 | |
685 | goto fail; | 678 | root = (struct dx_root *) frame->bh->b_data; |
686 | } | ||
687 | root = (struct dx_root *) bh->b_data; | ||
688 | if (root->info.hash_version != DX_HASH_TEA && | 679 | if (root->info.hash_version != DX_HASH_TEA && |
689 | root->info.hash_version != DX_HASH_HALF_MD4 && | 680 | root->info.hash_version != DX_HASH_HALF_MD4 && |
690 | root->info.hash_version != DX_HASH_LEGACY) { | 681 | root->info.hash_version != DX_HASH_LEGACY) { |
691 | ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", | 682 | ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", |
692 | root->info.hash_version); | 683 | root->info.hash_version); |
693 | brelse(bh); | ||
694 | *err = ERR_BAD_DX_DIR; | ||
695 | goto fail; | 684 | goto fail; |
696 | } | 685 | } |
697 | hinfo->hash_version = root->info.hash_version; | 686 | hinfo->hash_version = root->info.hash_version; |
@@ -705,16 +694,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
705 | if (root->info.unused_flags & 1) { | 694 | if (root->info.unused_flags & 1) { |
706 | ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", | 695 | ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", |
707 | root->info.unused_flags); | 696 | root->info.unused_flags); |
708 | brelse(bh); | ||
709 | *err = ERR_BAD_DX_DIR; | ||
710 | goto fail; | 697 | goto fail; |
711 | } | 698 | } |
712 | 699 | ||
713 | if ((indirect = root->info.indirect_levels) > 1) { | 700 | if ((indirect = root->info.indirect_levels) > 1) { |
714 | ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", | 701 | ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", |
715 | root->info.indirect_levels); | 702 | root->info.indirect_levels); |
716 | brelse(bh); | ||
717 | *err = ERR_BAD_DX_DIR; | ||
718 | goto fail; | 703 | goto fail; |
719 | } | 704 | } |
720 | 705 | ||
@@ -724,27 +709,21 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
724 | if (dx_get_limit(entries) != dx_root_limit(dir, | 709 | if (dx_get_limit(entries) != dx_root_limit(dir, |
725 | root->info.info_length)) { | 710 | root->info.info_length)) { |
726 | ext4_warning(dir->i_sb, "dx entry: limit != root limit"); | 711 | ext4_warning(dir->i_sb, "dx entry: limit != root limit"); |
727 | brelse(bh); | ||
728 | *err = ERR_BAD_DX_DIR; | ||
729 | goto fail; | 712 | goto fail; |
730 | } | 713 | } |
731 | 714 | ||
732 | dxtrace(printk("Look up %x", hash)); | 715 | dxtrace(printk("Look up %x", hash)); |
733 | while (1) | 716 | while (1) { |
734 | { | ||
735 | count = dx_get_count(entries); | 717 | count = dx_get_count(entries); |
736 | if (!count || count > dx_get_limit(entries)) { | 718 | if (!count || count > dx_get_limit(entries)) { |
737 | ext4_warning(dir->i_sb, | 719 | ext4_warning(dir->i_sb, |
738 | "dx entry: no count or count > limit"); | 720 | "dx entry: no count or count > limit"); |
739 | brelse(bh); | 721 | goto fail; |
740 | *err = ERR_BAD_DX_DIR; | ||
741 | goto fail2; | ||
742 | } | 722 | } |
743 | 723 | ||
744 | p = entries + 1; | 724 | p = entries + 1; |
745 | q = entries + count - 1; | 725 | q = entries + count - 1; |
746 | while (p <= q) | 726 | while (p <= q) { |
747 | { | ||
748 | m = p + (q - p)/2; | 727 | m = p + (q - p)/2; |
749 | dxtrace(printk(".")); | 728 | dxtrace(printk(".")); |
750 | if (dx_get_hash(m) > hash) | 729 | if (dx_get_hash(m) > hash) |
@@ -753,8 +732,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
753 | p = m + 1; | 732 | p = m + 1; |
754 | } | 733 | } |
755 | 734 | ||
756 | if (0) // linear search cross check | 735 | if (0) { // linear search cross check |
757 | { | ||
758 | unsigned n = count - 1; | 736 | unsigned n = count - 1; |
759 | at = entries; | 737 | at = entries; |
760 | while (n--) | 738 | while (n--) |
@@ -771,38 +749,35 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
771 | 749 | ||
772 | at = p - 1; | 750 | at = p - 1; |
773 | dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); | 751 | dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); |
774 | frame->bh = bh; | ||
775 | frame->entries = entries; | 752 | frame->entries = entries; |
776 | frame->at = at; | 753 | frame->at = at; |
777 | if (!indirect--) return frame; | 754 | if (!indirect--) |
778 | bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); | 755 | return frame; |
779 | if (IS_ERR(bh)) { | 756 | frame++; |
780 | *err = PTR_ERR(bh); | 757 | frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); |
781 | goto fail2; | 758 | if (IS_ERR(frame->bh)) { |
759 | ret_err = (struct dx_frame *) frame->bh; | ||
760 | frame->bh = NULL; | ||
761 | goto fail; | ||
782 | } | 762 | } |
783 | entries = ((struct dx_node *) bh->b_data)->entries; | 763 | entries = ((struct dx_node *) frame->bh->b_data)->entries; |
784 | 764 | ||
785 | if (dx_get_limit(entries) != dx_node_limit (dir)) { | 765 | if (dx_get_limit(entries) != dx_node_limit (dir)) { |
786 | ext4_warning(dir->i_sb, | 766 | ext4_warning(dir->i_sb, |
787 | "dx entry: limit != node limit"); | 767 | "dx entry: limit != node limit"); |
788 | brelse(bh); | 768 | goto fail; |
789 | *err = ERR_BAD_DX_DIR; | ||
790 | goto fail2; | ||
791 | } | 769 | } |
792 | frame++; | ||
793 | frame->bh = NULL; | ||
794 | } | 770 | } |
795 | fail2: | 771 | fail: |
796 | while (frame >= frame_in) { | 772 | while (frame >= frame_in) { |
797 | brelse(frame->bh); | 773 | brelse(frame->bh); |
798 | frame--; | 774 | frame--; |
799 | } | 775 | } |
800 | fail: | 776 | if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) |
801 | if (*err == ERR_BAD_DX_DIR) | ||
802 | ext4_warning(dir->i_sb, | 777 | ext4_warning(dir->i_sb, |
803 | "Corrupt dir inode %lu, running e2fsck is " | 778 | "Corrupt dir inode %lu, running e2fsck is " |
804 | "recommended.", dir->i_ino); | 779 | "recommended.", dir->i_ino); |
805 | return NULL; | 780 | return ret_err; |
806 | } | 781 | } |
807 | 782 | ||
808 | static void dx_release (struct dx_frame *frames) | 783 | static void dx_release (struct dx_frame *frames) |
@@ -988,9 +963,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
988 | } | 963 | } |
989 | hinfo.hash = start_hash; | 964 | hinfo.hash = start_hash; |
990 | hinfo.minor_hash = 0; | 965 | hinfo.minor_hash = 0; |
991 | frame = dx_probe(NULL, dir, &hinfo, frames, &err); | 966 | frame = dx_probe(NULL, dir, &hinfo, frames); |
992 | if (!frame) | 967 | if (IS_ERR(frame)) |
993 | return err; | 968 | return PTR_ERR(frame); |
994 | 969 | ||
995 | /* Add '.' and '..' from the htree header */ | 970 | /* Add '.' and '..' from the htree header */ |
996 | if (!start_hash && !start_minor_hash) { | 971 | if (!start_hash && !start_minor_hash) { |
@@ -1227,8 +1202,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
1227 | buffer */ | 1202 | buffer */ |
1228 | int num = 0; | 1203 | int num = 0; |
1229 | ext4_lblk_t nblocks; | 1204 | ext4_lblk_t nblocks; |
1230 | int i, err = 0; | 1205 | int i, namelen; |
1231 | int namelen; | ||
1232 | 1206 | ||
1233 | *res_dir = NULL; | 1207 | *res_dir = NULL; |
1234 | sb = dir->i_sb; | 1208 | sb = dir->i_sb; |
@@ -1258,17 +1232,13 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
1258 | goto restart; | 1232 | goto restart; |
1259 | } | 1233 | } |
1260 | if (is_dx(dir)) { | 1234 | if (is_dx(dir)) { |
1261 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); | 1235 | bh = ext4_dx_find_entry(dir, d_name, res_dir); |
1262 | /* | 1236 | /* |
1263 | * On success, or if the error was file not found, | 1237 | * On success, or if the error was file not found, |
1264 | * return. Otherwise, fall back to doing a search the | 1238 | * return. Otherwise, fall back to doing a search the |
1265 | * old fashioned way. | 1239 | * old fashioned way. |
1266 | */ | 1240 | */ |
1267 | if (err == -ENOENT) | 1241 | if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR) |
1268 | return NULL; | ||
1269 | if (err && err != ERR_BAD_DX_DIR) | ||
1270 | return ERR_PTR(err); | ||
1271 | if (bh) | ||
1272 | return bh; | 1242 | return bh; |
1273 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " | 1243 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
1274 | "falling back\n")); | 1244 | "falling back\n")); |
@@ -1298,10 +1268,10 @@ restart: | |||
1298 | break; | 1268 | break; |
1299 | } | 1269 | } |
1300 | num++; | 1270 | num++; |
1301 | bh = ext4_getblk(NULL, dir, b++, 0, &err); | 1271 | bh = ext4_getblk(NULL, dir, b++, 0); |
1302 | if (unlikely(err)) { | 1272 | if (unlikely(IS_ERR(bh))) { |
1303 | if (ra_max == 0) | 1273 | if (ra_max == 0) |
1304 | return ERR_PTR(err); | 1274 | return bh; |
1305 | break; | 1275 | break; |
1306 | } | 1276 | } |
1307 | bh_use[ra_max] = bh; | 1277 | bh_use[ra_max] = bh; |
@@ -1366,7 +1336,7 @@ cleanup_and_exit: | |||
1366 | } | 1336 | } |
1367 | 1337 | ||
1368 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, | 1338 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
1369 | struct ext4_dir_entry_2 **res_dir, int *err) | 1339 | struct ext4_dir_entry_2 **res_dir) |
1370 | { | 1340 | { |
1371 | struct super_block * sb = dir->i_sb; | 1341 | struct super_block * sb = dir->i_sb; |
1372 | struct dx_hash_info hinfo; | 1342 | struct dx_hash_info hinfo; |
@@ -1375,25 +1345,23 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1375 | ext4_lblk_t block; | 1345 | ext4_lblk_t block; |
1376 | int retval; | 1346 | int retval; |
1377 | 1347 | ||
1378 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) | 1348 | frame = dx_probe(d_name, dir, &hinfo, frames); |
1379 | return NULL; | 1349 | if (IS_ERR(frame)) |
1350 | return (struct buffer_head *) frame; | ||
1380 | do { | 1351 | do { |
1381 | block = dx_get_block(frame->at); | 1352 | block = dx_get_block(frame->at); |
1382 | bh = ext4_read_dirblock(dir, block, DIRENT); | 1353 | bh = ext4_read_dirblock(dir, block, DIRENT); |
1383 | if (IS_ERR(bh)) { | 1354 | if (IS_ERR(bh)) |
1384 | *err = PTR_ERR(bh); | ||
1385 | goto errout; | 1355 | goto errout; |
1386 | } | 1356 | |
1387 | retval = search_dirblock(bh, dir, d_name, | 1357 | retval = search_dirblock(bh, dir, d_name, |
1388 | block << EXT4_BLOCK_SIZE_BITS(sb), | 1358 | block << EXT4_BLOCK_SIZE_BITS(sb), |
1389 | res_dir); | 1359 | res_dir); |
1390 | if (retval == 1) { /* Success! */ | 1360 | if (retval == 1) |
1391 | dx_release(frames); | 1361 | goto success; |
1392 | return bh; | ||
1393 | } | ||
1394 | brelse(bh); | 1362 | brelse(bh); |
1395 | if (retval == -1) { | 1363 | if (retval == -1) { |
1396 | *err = ERR_BAD_DX_DIR; | 1364 | bh = ERR_PTR(ERR_BAD_DX_DIR); |
1397 | goto errout; | 1365 | goto errout; |
1398 | } | 1366 | } |
1399 | 1367 | ||
@@ -1402,18 +1370,19 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1402 | frames, NULL); | 1370 | frames, NULL); |
1403 | if (retval < 0) { | 1371 | if (retval < 0) { |
1404 | ext4_warning(sb, | 1372 | ext4_warning(sb, |
1405 | "error reading index page in directory #%lu", | 1373 | "error %d reading index page in directory #%lu", |
1406 | dir->i_ino); | 1374 | retval, dir->i_ino); |
1407 | *err = retval; | 1375 | bh = ERR_PTR(retval); |
1408 | goto errout; | 1376 | goto errout; |
1409 | } | 1377 | } |
1410 | } while (retval == 1); | 1378 | } while (retval == 1); |
1411 | 1379 | ||
1412 | *err = -ENOENT; | 1380 | bh = NULL; |
1413 | errout: | 1381 | errout: |
1414 | dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); | 1382 | dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); |
1415 | dx_release (frames); | 1383 | success: |
1416 | return NULL; | 1384 | dx_release(frames); |
1385 | return bh; | ||
1417 | } | 1386 | } |
1418 | 1387 | ||
1419 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 1388 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
@@ -1441,7 +1410,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi | |||
1441 | dentry); | 1410 | dentry); |
1442 | return ERR_PTR(-EIO); | 1411 | return ERR_PTR(-EIO); |
1443 | } | 1412 | } |
1444 | inode = ext4_iget(dir->i_sb, ino); | 1413 | inode = ext4_iget_normal(dir->i_sb, ino); |
1445 | if (inode == ERR_PTR(-ESTALE)) { | 1414 | if (inode == ERR_PTR(-ESTALE)) { |
1446 | EXT4_ERROR_INODE(dir, | 1415 | EXT4_ERROR_INODE(dir, |
1447 | "deleted inode referenced: %u", | 1416 | "deleted inode referenced: %u", |
@@ -1474,7 +1443,7 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1474 | return ERR_PTR(-EIO); | 1443 | return ERR_PTR(-EIO); |
1475 | } | 1444 | } |
1476 | 1445 | ||
1477 | return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); | 1446 | return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino)); |
1478 | } | 1447 | } |
1479 | 1448 | ||
1480 | /* | 1449 | /* |
@@ -1533,7 +1502,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) | |||
1533 | */ | 1502 | */ |
1534 | static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | 1503 | static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, |
1535 | struct buffer_head **bh,struct dx_frame *frame, | 1504 | struct buffer_head **bh,struct dx_frame *frame, |
1536 | struct dx_hash_info *hinfo, int *error) | 1505 | struct dx_hash_info *hinfo) |
1537 | { | 1506 | { |
1538 | unsigned blocksize = dir->i_sb->s_blocksize; | 1507 | unsigned blocksize = dir->i_sb->s_blocksize; |
1539 | unsigned count, continued; | 1508 | unsigned count, continued; |
@@ -1548,16 +1517,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1548 | int csum_size = 0; | 1517 | int csum_size = 0; |
1549 | int err = 0, i; | 1518 | int err = 0, i; |
1550 | 1519 | ||
1551 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 1520 | if (ext4_has_metadata_csum(dir->i_sb)) |
1552 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1553 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1521 | csum_size = sizeof(struct ext4_dir_entry_tail); |
1554 | 1522 | ||
1555 | bh2 = ext4_append(handle, dir, &newblock); | 1523 | bh2 = ext4_append(handle, dir, &newblock); |
1556 | if (IS_ERR(bh2)) { | 1524 | if (IS_ERR(bh2)) { |
1557 | brelse(*bh); | 1525 | brelse(*bh); |
1558 | *bh = NULL; | 1526 | *bh = NULL; |
1559 | *error = PTR_ERR(bh2); | 1527 | return (struct ext4_dir_entry_2 *) bh2; |
1560 | return NULL; | ||
1561 | } | 1528 | } |
1562 | 1529 | ||
1563 | BUFFER_TRACE(*bh, "get_write_access"); | 1530 | BUFFER_TRACE(*bh, "get_write_access"); |
@@ -1617,8 +1584,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1617 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); | 1584 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); |
1618 | 1585 | ||
1619 | /* Which block gets the new entry? */ | 1586 | /* Which block gets the new entry? */ |
1620 | if (hinfo->hash >= hash2) | 1587 | if (hinfo->hash >= hash2) { |
1621 | { | ||
1622 | swap(*bh, bh2); | 1588 | swap(*bh, bh2); |
1623 | de = de2; | 1589 | de = de2; |
1624 | } | 1590 | } |
@@ -1638,8 +1604,7 @@ journal_error: | |||
1638 | brelse(bh2); | 1604 | brelse(bh2); |
1639 | *bh = NULL; | 1605 | *bh = NULL; |
1640 | ext4_std_error(dir->i_sb, err); | 1606 | ext4_std_error(dir->i_sb, err); |
1641 | *error = err; | 1607 | return ERR_PTR(err); |
1642 | return NULL; | ||
1643 | } | 1608 | } |
1644 | 1609 | ||
1645 | int ext4_find_dest_de(struct inode *dir, struct inode *inode, | 1610 | int ext4_find_dest_de(struct inode *dir, struct inode *inode, |
@@ -1718,8 +1683,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1718 | int csum_size = 0; | 1683 | int csum_size = 0; |
1719 | int err; | 1684 | int err; |
1720 | 1685 | ||
1721 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1686 | if (ext4_has_metadata_csum(inode->i_sb)) |
1722 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1723 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1687 | csum_size = sizeof(struct ext4_dir_entry_tail); |
1724 | 1688 | ||
1725 | if (!de) { | 1689 | if (!de) { |
@@ -1786,8 +1750,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1786 | struct fake_dirent *fde; | 1750 | struct fake_dirent *fde; |
1787 | int csum_size = 0; | 1751 | int csum_size = 0; |
1788 | 1752 | ||
1789 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1753 | if (ext4_has_metadata_csum(inode->i_sb)) |
1790 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1791 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1754 | csum_size = sizeof(struct ext4_dir_entry_tail); |
1792 | 1755 | ||
1793 | blocksize = dir->i_sb->s_blocksize; | 1756 | blocksize = dir->i_sb->s_blocksize; |
@@ -1853,31 +1816,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1853 | hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; | 1816 | hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; |
1854 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 1817 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
1855 | ext4fs_dirhash(name, namelen, &hinfo); | 1818 | ext4fs_dirhash(name, namelen, &hinfo); |
1819 | memset(frames, 0, sizeof(frames)); | ||
1856 | frame = frames; | 1820 | frame = frames; |
1857 | frame->entries = entries; | 1821 | frame->entries = entries; |
1858 | frame->at = entries; | 1822 | frame->at = entries; |
1859 | frame->bh = bh; | 1823 | frame->bh = bh; |
1860 | bh = bh2; | 1824 | bh = bh2; |
1861 | 1825 | ||
1862 | ext4_handle_dirty_dx_node(handle, dir, frame->bh); | 1826 | retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); |
1863 | ext4_handle_dirty_dirent_node(handle, dir, bh); | 1827 | if (retval) |
1828 | goto out_frames; | ||
1829 | retval = ext4_handle_dirty_dirent_node(handle, dir, bh); | ||
1830 | if (retval) | ||
1831 | goto out_frames; | ||
1864 | 1832 | ||
1865 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); | 1833 | de = do_split(handle,dir, &bh, frame, &hinfo); |
1866 | if (!de) { | 1834 | if (IS_ERR(de)) { |
1867 | /* | 1835 | retval = PTR_ERR(de); |
1868 | * Even if the block split failed, we have to properly write | 1836 | goto out_frames; |
1869 | * out all the changes we did so far. Otherwise we can end up | ||
1870 | * with corrupted filesystem. | ||
1871 | */ | ||
1872 | ext4_mark_inode_dirty(handle, dir); | ||
1873 | dx_release(frames); | ||
1874 | return retval; | ||
1875 | } | 1837 | } |
1876 | dx_release(frames); | 1838 | dx_release(frames); |
1877 | 1839 | ||
1878 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1840 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1879 | brelse(bh); | 1841 | brelse(bh); |
1880 | return retval; | 1842 | return retval; |
1843 | out_frames: | ||
1844 | /* | ||
1845 | * Even if the block split failed, we have to properly write | ||
1846 | * out all the changes we did so far. Otherwise we can end up | ||
1847 | * with corrupted filesystem. | ||
1848 | */ | ||
1849 | ext4_mark_inode_dirty(handle, dir); | ||
1850 | dx_release(frames); | ||
1851 | return retval; | ||
1881 | } | 1852 | } |
1882 | 1853 | ||
1883 | /* | 1854 | /* |
@@ -1904,8 +1875,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1904 | ext4_lblk_t block, blocks; | 1875 | ext4_lblk_t block, blocks; |
1905 | int csum_size = 0; | 1876 | int csum_size = 0; |
1906 | 1877 | ||
1907 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1878 | if (ext4_has_metadata_csum(inode->i_sb)) |
1908 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1909 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1879 | csum_size = sizeof(struct ext4_dir_entry_tail); |
1910 | 1880 | ||
1911 | sb = dir->i_sb; | 1881 | sb = dir->i_sb; |
@@ -1982,9 +1952,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1982 | struct ext4_dir_entry_2 *de; | 1952 | struct ext4_dir_entry_2 *de; |
1983 | int err; | 1953 | int err; |
1984 | 1954 | ||
1985 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); | 1955 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames); |
1986 | if (!frame) | 1956 | if (IS_ERR(frame)) |
1987 | return err; | 1957 | return PTR_ERR(frame); |
1988 | entries = frame->entries; | 1958 | entries = frame->entries; |
1989 | at = frame->at; | 1959 | at = frame->at; |
1990 | bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); | 1960 | bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); |
@@ -2095,9 +2065,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
2095 | goto cleanup; | 2065 | goto cleanup; |
2096 | } | 2066 | } |
2097 | } | 2067 | } |
2098 | de = do_split(handle, dir, &bh, frame, &hinfo, &err); | 2068 | de = do_split(handle, dir, &bh, frame, &hinfo); |
2099 | if (!de) | 2069 | if (IS_ERR(de)) { |
2070 | err = PTR_ERR(de); | ||
2100 | goto cleanup; | 2071 | goto cleanup; |
2072 | } | ||
2101 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); | 2073 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); |
2102 | goto cleanup; | 2074 | goto cleanup; |
2103 | 2075 | ||
@@ -2167,8 +2139,7 @@ static int ext4_delete_entry(handle_t *handle, | |||
2167 | return err; | 2139 | return err; |
2168 | } | 2140 | } |
2169 | 2141 | ||
2170 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 2142 | if (ext4_has_metadata_csum(dir->i_sb)) |
2171 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
2172 | csum_size = sizeof(struct ext4_dir_entry_tail); | 2143 | csum_size = sizeof(struct ext4_dir_entry_tail); |
2173 | 2144 | ||
2174 | BUFFER_TRACE(bh, "get_write_access"); | 2145 | BUFFER_TRACE(bh, "get_write_access"); |
@@ -2387,8 +2358,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, | |||
2387 | int csum_size = 0; | 2358 | int csum_size = 0; |
2388 | int err; | 2359 | int err; |
2389 | 2360 | ||
2390 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 2361 | if (ext4_has_metadata_csum(dir->i_sb)) |
2391 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
2392 | csum_size = sizeof(struct ext4_dir_entry_tail); | 2362 | csum_size = sizeof(struct ext4_dir_entry_tail); |
2393 | 2363 | ||
2394 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { | 2364 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { |
@@ -2403,10 +2373,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, | |||
2403 | dir_block = ext4_append(handle, inode, &block); | 2373 | dir_block = ext4_append(handle, inode, &block); |
2404 | if (IS_ERR(dir_block)) | 2374 | if (IS_ERR(dir_block)) |
2405 | return PTR_ERR(dir_block); | 2375 | return PTR_ERR(dir_block); |
2406 | BUFFER_TRACE(dir_block, "get_write_access"); | ||
2407 | err = ext4_journal_get_write_access(handle, dir_block); | ||
2408 | if (err) | ||
2409 | goto out; | ||
2410 | de = (struct ext4_dir_entry_2 *)dir_block->b_data; | 2376 | de = (struct ext4_dir_entry_2 *)dir_block->b_data; |
2411 | ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); | 2377 | ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); |
2412 | set_nlink(inode, 2); | 2378 | set_nlink(inode, 2); |
@@ -2573,7 +2539,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2573 | int err = 0, rc; | 2539 | int err = 0, rc; |
2574 | bool dirty = false; | 2540 | bool dirty = false; |
2575 | 2541 | ||
2576 | if (!sbi->s_journal) | 2542 | if (!sbi->s_journal || is_bad_inode(inode)) |
2577 | return 0; | 2543 | return 0; |
2578 | 2544 | ||
2579 | WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && | 2545 | WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && |
@@ -3190,6 +3156,39 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
3190 | } | 3156 | } |
3191 | } | 3157 | } |
3192 | 3158 | ||
3159 | static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent, | ||
3160 | int credits, handle_t **h) | ||
3161 | { | ||
3162 | struct inode *wh; | ||
3163 | handle_t *handle; | ||
3164 | int retries = 0; | ||
3165 | |||
3166 | /* | ||
3167 | * for inode block, sb block, group summaries, | ||
3168 | * and inode bitmap | ||
3169 | */ | ||
3170 | credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) + | ||
3171 | EXT4_XATTR_TRANS_BLOCKS + 4); | ||
3172 | retry: | ||
3173 | wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE, | ||
3174 | &ent->dentry->d_name, 0, NULL, | ||
3175 | EXT4_HT_DIR, credits); | ||
3176 | |||
3177 | handle = ext4_journal_current_handle(); | ||
3178 | if (IS_ERR(wh)) { | ||
3179 | if (handle) | ||
3180 | ext4_journal_stop(handle); | ||
3181 | if (PTR_ERR(wh) == -ENOSPC && | ||
3182 | ext4_should_retry_alloc(ent->dir->i_sb, &retries)) | ||
3183 | goto retry; | ||
3184 | } else { | ||
3185 | *h = handle; | ||
3186 | init_special_inode(wh, wh->i_mode, WHITEOUT_DEV); | ||
3187 | wh->i_op = &ext4_special_inode_operations; | ||
3188 | } | ||
3189 | return wh; | ||
3190 | } | ||
3191 | |||
3193 | /* | 3192 | /* |
3194 | * Anybody can rename anything with this: the permission checks are left to the | 3193 | * Anybody can rename anything with this: the permission checks are left to the |
3195 | * higher-level routines. | 3194 | * higher-level routines. |
@@ -3199,7 +3198,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
3199 | * This comes from rename(const char *oldpath, const char *newpath) | 3198 | * This comes from rename(const char *oldpath, const char *newpath) |
3200 | */ | 3199 | */ |
3201 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | 3200 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
3202 | struct inode *new_dir, struct dentry *new_dentry) | 3201 | struct inode *new_dir, struct dentry *new_dentry, |
3202 | unsigned int flags) | ||
3203 | { | 3203 | { |
3204 | handle_t *handle = NULL; | 3204 | handle_t *handle = NULL; |
3205 | struct ext4_renament old = { | 3205 | struct ext4_renament old = { |
@@ -3214,6 +3214,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3214 | }; | 3214 | }; |
3215 | int force_reread; | 3215 | int force_reread; |
3216 | int retval; | 3216 | int retval; |
3217 | struct inode *whiteout = NULL; | ||
3218 | int credits; | ||
3219 | u8 old_file_type; | ||
3217 | 3220 | ||
3218 | dquot_initialize(old.dir); | 3221 | dquot_initialize(old.dir); |
3219 | dquot_initialize(new.dir); | 3222 | dquot_initialize(new.dir); |
@@ -3252,11 +3255,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3252 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) | 3255 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) |
3253 | ext4_alloc_da_blocks(old.inode); | 3256 | ext4_alloc_da_blocks(old.inode); |
3254 | 3257 | ||
3255 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, | 3258 | credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + |
3256 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + | 3259 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); |
3257 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | 3260 | if (!(flags & RENAME_WHITEOUT)) { |
3258 | if (IS_ERR(handle)) | 3261 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); |
3259 | return PTR_ERR(handle); | 3262 | if (IS_ERR(handle)) |
3263 | return PTR_ERR(handle); | ||
3264 | } else { | ||
3265 | whiteout = ext4_whiteout_for_rename(&old, credits, &handle); | ||
3266 | if (IS_ERR(whiteout)) | ||
3267 | return PTR_ERR(whiteout); | ||
3268 | } | ||
3260 | 3269 | ||
3261 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | 3270 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
3262 | ext4_handle_sync(handle); | 3271 | ext4_handle_sync(handle); |
@@ -3284,13 +3293,26 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3284 | */ | 3293 | */ |
3285 | force_reread = (new.dir->i_ino == old.dir->i_ino && | 3294 | force_reread = (new.dir->i_ino == old.dir->i_ino && |
3286 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); | 3295 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); |
3296 | |||
3297 | old_file_type = old.de->file_type; | ||
3298 | if (whiteout) { | ||
3299 | /* | ||
3300 | * Do this before adding a new entry, so the old entry is sure | ||
3301 | * to be still pointing to the valid old entry. | ||
3302 | */ | ||
3303 | retval = ext4_setent(handle, &old, whiteout->i_ino, | ||
3304 | EXT4_FT_CHRDEV); | ||
3305 | if (retval) | ||
3306 | goto end_rename; | ||
3307 | ext4_mark_inode_dirty(handle, whiteout); | ||
3308 | } | ||
3287 | if (!new.bh) { | 3309 | if (!new.bh) { |
3288 | retval = ext4_add_entry(handle, new.dentry, old.inode); | 3310 | retval = ext4_add_entry(handle, new.dentry, old.inode); |
3289 | if (retval) | 3311 | if (retval) |
3290 | goto end_rename; | 3312 | goto end_rename; |
3291 | } else { | 3313 | } else { |
3292 | retval = ext4_setent(handle, &new, | 3314 | retval = ext4_setent(handle, &new, |
3293 | old.inode->i_ino, old.de->file_type); | 3315 | old.inode->i_ino, old_file_type); |
3294 | if (retval) | 3316 | if (retval) |
3295 | goto end_rename; | 3317 | goto end_rename; |
3296 | } | 3318 | } |
@@ -3305,10 +3327,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3305 | old.inode->i_ctime = ext4_current_time(old.inode); | 3327 | old.inode->i_ctime = ext4_current_time(old.inode); |
3306 | ext4_mark_inode_dirty(handle, old.inode); | 3328 | ext4_mark_inode_dirty(handle, old.inode); |
3307 | 3329 | ||
3308 | /* | 3330 | if (!whiteout) { |
3309 | * ok, that's it | 3331 | /* |
3310 | */ | 3332 | * ok, that's it |
3311 | ext4_rename_delete(handle, &old, force_reread); | 3333 | */ |
3334 | ext4_rename_delete(handle, &old, force_reread); | ||
3335 | } | ||
3312 | 3336 | ||
3313 | if (new.inode) { | 3337 | if (new.inode) { |
3314 | ext4_dec_count(handle, new.inode); | 3338 | ext4_dec_count(handle, new.inode); |
@@ -3344,6 +3368,12 @@ end_rename: | |||
3344 | brelse(old.dir_bh); | 3368 | brelse(old.dir_bh); |
3345 | brelse(old.bh); | 3369 | brelse(old.bh); |
3346 | brelse(new.bh); | 3370 | brelse(new.bh); |
3371 | if (whiteout) { | ||
3372 | if (retval) | ||
3373 | drop_nlink(whiteout); | ||
3374 | unlock_new_inode(whiteout); | ||
3375 | iput(whiteout); | ||
3376 | } | ||
3347 | if (handle) | 3377 | if (handle) |
3348 | ext4_journal_stop(handle); | 3378 | ext4_journal_stop(handle); |
3349 | return retval; | 3379 | return retval; |
@@ -3476,18 +3506,15 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
3476 | struct inode *new_dir, struct dentry *new_dentry, | 3506 | struct inode *new_dir, struct dentry *new_dentry, |
3477 | unsigned int flags) | 3507 | unsigned int flags) |
3478 | { | 3508 | { |
3479 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 3509 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
3480 | return -EINVAL; | 3510 | return -EINVAL; |
3481 | 3511 | ||
3482 | if (flags & RENAME_EXCHANGE) { | 3512 | if (flags & RENAME_EXCHANGE) { |
3483 | return ext4_cross_rename(old_dir, old_dentry, | 3513 | return ext4_cross_rename(old_dir, old_dentry, |
3484 | new_dir, new_dentry); | 3514 | new_dir, new_dentry); |
3485 | } | 3515 | } |
3486 | /* | 3516 | |
3487 | * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" | 3517 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags); |
3488 | * is equivalent to regular rename. | ||
3489 | */ | ||
3490 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
3491 | } | 3518 | } |
3492 | 3519 | ||
3493 | /* | 3520 | /* |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1e43b905ff98..ca4588388fc3 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -1081,7 +1081,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, | |||
1081 | break; | 1081 | break; |
1082 | 1082 | ||
1083 | if (meta_bg == 0) | 1083 | if (meta_bg == 0) |
1084 | backup_block = group * bpg + blk_off; | 1084 | backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; |
1085 | else | 1085 | else |
1086 | backup_block = (ext4_group_first_block_no(sb, group) + | 1086 | backup_block = (ext4_group_first_block_no(sb, group) + |
1087 | ext4_bg_has_super(sb, group)); | 1087 | ext4_bg_has_super(sb, group)); |
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb, | |||
1212 | { | 1212 | { |
1213 | struct buffer_head *bh; | 1213 | struct buffer_head *bh; |
1214 | 1214 | ||
1215 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1215 | if (!ext4_has_metadata_csum(sb)) |
1216 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1217 | return 0; | 1216 | return 0; |
1218 | 1217 | ||
1219 | bh = ext4_get_bitmap(sb, group_data->inode_bitmap); | 1218 | bh = ext4_get_bitmap(sb, group_data->inode_bitmap); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 05c159218bc2..2c9e6864abd9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
70 | static void ext4_clear_journal_err(struct super_block *sb, | 70 | static void ext4_clear_journal_err(struct super_block *sb, |
71 | struct ext4_super_block *es); | 71 | struct ext4_super_block *es); |
72 | static int ext4_sync_fs(struct super_block *sb, int wait); | 72 | static int ext4_sync_fs(struct super_block *sb, int wait); |
73 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); | ||
74 | static int ext4_remount(struct super_block *sb, int *flags, char *data); | 73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); |
75 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | 74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); |
76 | static int ext4_unfreeze(struct super_block *sb); | 75 | static int ext4_unfreeze(struct super_block *sb); |
@@ -141,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb, | |||
141 | static int ext4_superblock_csum_verify(struct super_block *sb, | 140 | static int ext4_superblock_csum_verify(struct super_block *sb, |
142 | struct ext4_super_block *es) | 141 | struct ext4_super_block *es) |
143 | { | 142 | { |
144 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 143 | if (!ext4_has_metadata_csum(sb)) |
145 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
146 | return 1; | 144 | return 1; |
147 | 145 | ||
148 | return es->s_checksum == ext4_superblock_csum(sb, es); | 146 | return es->s_checksum == ext4_superblock_csum(sb, es); |
@@ -152,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb) | |||
152 | { | 150 | { |
153 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 151 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
154 | 152 | ||
155 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 153 | if (!ext4_has_metadata_csum(sb)) |
156 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
157 | return; | 154 | return; |
158 | 155 | ||
159 | es->s_checksum = ext4_superblock_csum(sb, es); | 156 | es->s_checksum = ext4_superblock_csum(sb, es); |
@@ -820,10 +817,9 @@ static void ext4_put_super(struct super_block *sb) | |||
820 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 817 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
821 | percpu_counter_destroy(&sbi->s_dirs_counter); | 818 | percpu_counter_destroy(&sbi->s_dirs_counter); |
822 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); | 819 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
823 | percpu_counter_destroy(&sbi->s_extent_cache_cnt); | ||
824 | brelse(sbi->s_sbh); | 820 | brelse(sbi->s_sbh); |
825 | #ifdef CONFIG_QUOTA | 821 | #ifdef CONFIG_QUOTA |
826 | for (i = 0; i < MAXQUOTAS; i++) | 822 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
827 | kfree(sbi->s_qf_names[i]); | 823 | kfree(sbi->s_qf_names[i]); |
828 | #endif | 824 | #endif |
829 | 825 | ||
@@ -885,6 +881,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
885 | ext4_es_init_tree(&ei->i_es_tree); | 881 | ext4_es_init_tree(&ei->i_es_tree); |
886 | rwlock_init(&ei->i_es_lock); | 882 | rwlock_init(&ei->i_es_lock); |
887 | INIT_LIST_HEAD(&ei->i_es_lru); | 883 | INIT_LIST_HEAD(&ei->i_es_lru); |
884 | ei->i_es_all_nr = 0; | ||
888 | ei->i_es_lru_nr = 0; | 885 | ei->i_es_lru_nr = 0; |
889 | ei->i_touch_when = 0; | 886 | ei->i_touch_when = 0; |
890 | ei->i_reserved_data_blocks = 0; | 887 | ei->i_reserved_data_blocks = 0; |
@@ -1002,7 +999,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, | |||
1002 | * Currently we don't know the generation for parent directory, so | 999 | * Currently we don't know the generation for parent directory, so |
1003 | * a generation of 0 means "accept any" | 1000 | * a generation of 0 means "accept any" |
1004 | */ | 1001 | */ |
1005 | inode = ext4_iget(sb, ino); | 1002 | inode = ext4_iget_normal(sb, ino); |
1006 | if (IS_ERR(inode)) | 1003 | if (IS_ERR(inode)) |
1007 | return ERR_CAST(inode); | 1004 | return ERR_CAST(inode); |
1008 | if (generation && inode->i_generation != generation) { | 1005 | if (generation && inode->i_generation != generation) { |
@@ -1124,25 +1121,6 @@ static const struct super_operations ext4_sops = { | |||
1124 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1121 | .bdev_try_to_free_page = bdev_try_to_free_page, |
1125 | }; | 1122 | }; |
1126 | 1123 | ||
1127 | static const struct super_operations ext4_nojournal_sops = { | ||
1128 | .alloc_inode = ext4_alloc_inode, | ||
1129 | .destroy_inode = ext4_destroy_inode, | ||
1130 | .write_inode = ext4_write_inode, | ||
1131 | .dirty_inode = ext4_dirty_inode, | ||
1132 | .drop_inode = ext4_drop_inode, | ||
1133 | .evict_inode = ext4_evict_inode, | ||
1134 | .sync_fs = ext4_sync_fs_nojournal, | ||
1135 | .put_super = ext4_put_super, | ||
1136 | .statfs = ext4_statfs, | ||
1137 | .remount_fs = ext4_remount, | ||
1138 | .show_options = ext4_show_options, | ||
1139 | #ifdef CONFIG_QUOTA | ||
1140 | .quota_read = ext4_quota_read, | ||
1141 | .quota_write = ext4_quota_write, | ||
1142 | #endif | ||
1143 | .bdev_try_to_free_page = bdev_try_to_free_page, | ||
1144 | }; | ||
1145 | |||
1146 | static const struct export_operations ext4_export_ops = { | 1124 | static const struct export_operations ext4_export_ops = { |
1147 | .fh_to_dentry = ext4_fh_to_dentry, | 1125 | .fh_to_dentry = ext4_fh_to_dentry, |
1148 | .fh_to_parent = ext4_fh_to_parent, | 1126 | .fh_to_parent = ext4_fh_to_parent, |
@@ -1712,13 +1690,6 @@ static int parse_options(char *options, struct super_block *sb, | |||
1712 | "not specified"); | 1690 | "not specified"); |
1713 | return 0; | 1691 | return 0; |
1714 | } | 1692 | } |
1715 | } else { | ||
1716 | if (sbi->s_jquota_fmt) { | ||
1717 | ext4_msg(sb, KERN_ERR, "journaled quota format " | ||
1718 | "specified with no journaling " | ||
1719 | "enabled"); | ||
1720 | return 0; | ||
1721 | } | ||
1722 | } | 1693 | } |
1723 | #endif | 1694 | #endif |
1724 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 1695 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
@@ -2016,8 +1987,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | |||
2016 | __u16 crc = 0; | 1987 | __u16 crc = 0; |
2017 | __le32 le_group = cpu_to_le32(block_group); | 1988 | __le32 le_group = cpu_to_le32(block_group); |
2018 | 1989 | ||
2019 | if ((sbi->s_es->s_feature_ro_compat & | 1990 | if (ext4_has_metadata_csum(sbi->s_sb)) { |
2020 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { | ||
2021 | /* Use new metadata_csum algorithm */ | 1991 | /* Use new metadata_csum algorithm */ |
2022 | __le16 save_csum; | 1992 | __le16 save_csum; |
2023 | __u32 csum32; | 1993 | __u32 csum32; |
@@ -2035,6 +2005,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | |||
2035 | } | 2005 | } |
2036 | 2006 | ||
2037 | /* old crc16 code */ | 2007 | /* old crc16 code */ |
2008 | if (!(sbi->s_es->s_feature_ro_compat & | ||
2009 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) | ||
2010 | return 0; | ||
2011 | |||
2038 | offset = offsetof(struct ext4_group_desc, bg_checksum); | 2012 | offset = offsetof(struct ext4_group_desc, bg_checksum); |
2039 | 2013 | ||
2040 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); | 2014 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); |
@@ -2191,7 +2165,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2191 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2165 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2192 | /* don't clear list on RO mount w/ errors */ | 2166 | /* don't clear list on RO mount w/ errors */ |
2193 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { | 2167 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { |
2194 | jbd_debug(1, "Errors on filesystem, " | 2168 | ext4_msg(sb, KERN_INFO, "Errors on filesystem, " |
2195 | "clearing orphan list.\n"); | 2169 | "clearing orphan list.\n"); |
2196 | es->s_last_orphan = 0; | 2170 | es->s_last_orphan = 0; |
2197 | } | 2171 | } |
@@ -2207,7 +2181,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2207 | /* Needed for iput() to work correctly and not trash data */ | 2181 | /* Needed for iput() to work correctly and not trash data */ |
2208 | sb->s_flags |= MS_ACTIVE; | 2182 | sb->s_flags |= MS_ACTIVE; |
2209 | /* Turn on quotas so that they are updated correctly */ | 2183 | /* Turn on quotas so that they are updated correctly */ |
2210 | for (i = 0; i < MAXQUOTAS; i++) { | 2184 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
2211 | if (EXT4_SB(sb)->s_qf_names[i]) { | 2185 | if (EXT4_SB(sb)->s_qf_names[i]) { |
2212 | int ret = ext4_quota_on_mount(sb, i); | 2186 | int ret = ext4_quota_on_mount(sb, i); |
2213 | if (ret < 0) | 2187 | if (ret < 0) |
@@ -2263,7 +2237,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2263 | PLURAL(nr_truncates)); | 2237 | PLURAL(nr_truncates)); |
2264 | #ifdef CONFIG_QUOTA | 2238 | #ifdef CONFIG_QUOTA |
2265 | /* Turn quotas off */ | 2239 | /* Turn quotas off */ |
2266 | for (i = 0; i < MAXQUOTAS; i++) { | 2240 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
2267 | if (sb_dqopt(sb)->files[i]) | 2241 | if (sb_dqopt(sb)->files[i]) |
2268 | dquot_quota_off(sb, i); | 2242 | dquot_quota_off(sb, i); |
2269 | } | 2243 | } |
@@ -2548,6 +2522,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, | |||
2548 | return count; | 2522 | return count; |
2549 | } | 2523 | } |
2550 | 2524 | ||
2525 | static ssize_t es_ui_show(struct ext4_attr *a, | ||
2526 | struct ext4_sb_info *sbi, char *buf) | ||
2527 | { | ||
2528 | |||
2529 | unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + | ||
2530 | a->u.offset); | ||
2531 | |||
2532 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | ||
2533 | } | ||
2534 | |||
2551 | static ssize_t reserved_clusters_show(struct ext4_attr *a, | 2535 | static ssize_t reserved_clusters_show(struct ext4_attr *a, |
2552 | struct ext4_sb_info *sbi, char *buf) | 2536 | struct ext4_sb_info *sbi, char *buf) |
2553 | { | 2537 | { |
@@ -2601,14 +2585,29 @@ static struct ext4_attr ext4_attr_##_name = { \ | |||
2601 | .offset = offsetof(struct ext4_sb_info, _elname),\ | 2585 | .offset = offsetof(struct ext4_sb_info, _elname),\ |
2602 | }, \ | 2586 | }, \ |
2603 | } | 2587 | } |
2588 | |||
2589 | #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ | ||
2590 | static struct ext4_attr ext4_attr_##_name = { \ | ||
2591 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | ||
2592 | .show = _show, \ | ||
2593 | .store = _store, \ | ||
2594 | .u = { \ | ||
2595 | .offset = offsetof(struct ext4_super_block, _elname), \ | ||
2596 | }, \ | ||
2597 | } | ||
2598 | |||
2604 | #define EXT4_ATTR(name, mode, show, store) \ | 2599 | #define EXT4_ATTR(name, mode, show, store) \ |
2605 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2600 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
2606 | 2601 | ||
2607 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) | 2602 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) |
2608 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) | 2603 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) |
2609 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) | 2604 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) |
2605 | |||
2606 | #define EXT4_RO_ATTR_ES_UI(name, elname) \ | ||
2607 | EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) | ||
2610 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2608 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
2611 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) | 2609 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) |
2610 | |||
2612 | #define ATTR_LIST(name) &ext4_attr_##name.attr | 2611 | #define ATTR_LIST(name) &ext4_attr_##name.attr |
2613 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ | 2612 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ |
2614 | static struct ext4_attr ext4_attr_##_name = { \ | 2613 | static struct ext4_attr ext4_attr_##_name = { \ |
@@ -2641,6 +2640,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int | |||
2641 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); | 2640 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); |
2642 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); | 2641 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); |
2643 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); | 2642 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); |
2643 | EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); | ||
2644 | EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); | ||
2645 | EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); | ||
2644 | 2646 | ||
2645 | static struct attribute *ext4_attrs[] = { | 2647 | static struct attribute *ext4_attrs[] = { |
2646 | ATTR_LIST(delayed_allocation_blocks), | 2648 | ATTR_LIST(delayed_allocation_blocks), |
@@ -2664,6 +2666,9 @@ static struct attribute *ext4_attrs[] = { | |||
2664 | ATTR_LIST(warning_ratelimit_burst), | 2666 | ATTR_LIST(warning_ratelimit_burst), |
2665 | ATTR_LIST(msg_ratelimit_interval_ms), | 2667 | ATTR_LIST(msg_ratelimit_interval_ms), |
2666 | ATTR_LIST(msg_ratelimit_burst), | 2668 | ATTR_LIST(msg_ratelimit_burst), |
2669 | ATTR_LIST(errors_count), | ||
2670 | ATTR_LIST(first_error_time), | ||
2671 | ATTR_LIST(last_error_time), | ||
2667 | NULL, | 2672 | NULL, |
2668 | }; | 2673 | }; |
2669 | 2674 | ||
@@ -2723,9 +2728,25 @@ static void ext4_feat_release(struct kobject *kobj) | |||
2723 | complete(&ext4_feat->f_kobj_unregister); | 2728 | complete(&ext4_feat->f_kobj_unregister); |
2724 | } | 2729 | } |
2725 | 2730 | ||
2731 | static ssize_t ext4_feat_show(struct kobject *kobj, | ||
2732 | struct attribute *attr, char *buf) | ||
2733 | { | ||
2734 | return snprintf(buf, PAGE_SIZE, "supported\n"); | ||
2735 | } | ||
2736 | |||
2737 | /* | ||
2738 | * We can not use ext4_attr_show/store because it relies on the kobject | ||
2739 | * being embedded in the ext4_sb_info structure which is definitely not | ||
2740 | * true in this case. | ||
2741 | */ | ||
2742 | static const struct sysfs_ops ext4_feat_ops = { | ||
2743 | .show = ext4_feat_show, | ||
2744 | .store = NULL, | ||
2745 | }; | ||
2746 | |||
2726 | static struct kobj_type ext4_feat_ktype = { | 2747 | static struct kobj_type ext4_feat_ktype = { |
2727 | .default_attrs = ext4_feat_attrs, | 2748 | .default_attrs = ext4_feat_attrs, |
2728 | .sysfs_ops = &ext4_attr_ops, | 2749 | .sysfs_ops = &ext4_feat_ops, |
2729 | .release = ext4_feat_release, | 2750 | .release = ext4_feat_release, |
2730 | }; | 2751 | }; |
2731 | 2752 | ||
@@ -3179,8 +3200,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
3179 | int compat, incompat; | 3200 | int compat, incompat; |
3180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3201 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3181 | 3202 | ||
3182 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3203 | if (ext4_has_metadata_csum(sb)) { |
3183 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
3184 | /* journal checksum v3 */ | 3204 | /* journal checksum v3 */ |
3185 | compat = 0; | 3205 | compat = 0; |
3186 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; | 3206 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; |
@@ -3190,6 +3210,10 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
3190 | incompat = 0; | 3210 | incompat = 0; |
3191 | } | 3211 | } |
3192 | 3212 | ||
3213 | jbd2_journal_clear_features(sbi->s_journal, | ||
3214 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
3215 | JBD2_FEATURE_INCOMPAT_CSUM_V3 | | ||
3216 | JBD2_FEATURE_INCOMPAT_CSUM_V2); | ||
3193 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 3217 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { |
3194 | ret = jbd2_journal_set_features(sbi->s_journal, | 3218 | ret = jbd2_journal_set_features(sbi->s_journal, |
3195 | compat, 0, | 3219 | compat, 0, |
@@ -3202,11 +3226,8 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
3202 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 3226 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
3203 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 3227 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
3204 | } else { | 3228 | } else { |
3205 | jbd2_journal_clear_features(sbi->s_journal, | 3229 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
3206 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 3230 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
3207 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | | ||
3208 | JBD2_FEATURE_INCOMPAT_CSUM_V3 | | ||
3209 | JBD2_FEATURE_INCOMPAT_CSUM_V2); | ||
3210 | } | 3231 | } |
3211 | 3232 | ||
3212 | return ret; | 3233 | return ret; |
@@ -3436,7 +3457,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3436 | logical_sb_block = sb_block; | 3457 | logical_sb_block = sb_block; |
3437 | } | 3458 | } |
3438 | 3459 | ||
3439 | if (!(bh = sb_bread(sb, logical_sb_block))) { | 3460 | if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { |
3440 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); | 3461 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); |
3441 | goto out_fail; | 3462 | goto out_fail; |
3442 | } | 3463 | } |
@@ -3487,8 +3508,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3487 | } | 3508 | } |
3488 | 3509 | ||
3489 | /* Precompute checksum seed for all metadata */ | 3510 | /* Precompute checksum seed for all metadata */ |
3490 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3511 | if (ext4_has_metadata_csum(sb)) |
3491 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
3492 | sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, | 3512 | sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, |
3493 | sizeof(es->s_uuid)); | 3513 | sizeof(es->s_uuid)); |
3494 | 3514 | ||
@@ -3506,6 +3526,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3506 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 3526 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3507 | set_opt(sb, POSIX_ACL); | 3527 | set_opt(sb, POSIX_ACL); |
3508 | #endif | 3528 | #endif |
3529 | /* don't forget to enable journal_csum when metadata_csum is enabled. */ | ||
3530 | if (ext4_has_metadata_csum(sb)) | ||
3531 | set_opt(sb, JOURNAL_CHECKSUM); | ||
3532 | |||
3509 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 3533 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
3510 | set_opt(sb, JOURNAL_DATA); | 3534 | set_opt(sb, JOURNAL_DATA); |
3511 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 3535 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
@@ -3519,8 +3543,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3519 | set_opt(sb, ERRORS_CONT); | 3543 | set_opt(sb, ERRORS_CONT); |
3520 | else | 3544 | else |
3521 | set_opt(sb, ERRORS_RO); | 3545 | set_opt(sb, ERRORS_RO); |
3522 | if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) | 3546 | /* block_validity enabled by default; disable with noblock_validity */ |
3523 | set_opt(sb, BLOCK_VALIDITY); | 3547 | set_opt(sb, BLOCK_VALIDITY); |
3524 | if (def_mount_opts & EXT4_DEFM_DISCARD) | 3548 | if (def_mount_opts & EXT4_DEFM_DISCARD) |
3525 | set_opt(sb, DISCARD); | 3549 | set_opt(sb, DISCARD); |
3526 | 3550 | ||
@@ -3646,7 +3670,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3646 | brelse(bh); | 3670 | brelse(bh); |
3647 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; | 3671 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; |
3648 | offset = do_div(logical_sb_block, blocksize); | 3672 | offset = do_div(logical_sb_block, blocksize); |
3649 | bh = sb_bread(sb, logical_sb_block); | 3673 | bh = sb_bread_unmovable(sb, logical_sb_block); |
3650 | if (!bh) { | 3674 | if (!bh) { |
3651 | ext4_msg(sb, KERN_ERR, | 3675 | ext4_msg(sb, KERN_ERR, |
3652 | "Can't read superblock on 2nd try"); | 3676 | "Can't read superblock on 2nd try"); |
@@ -3868,7 +3892,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3868 | 3892 | ||
3869 | for (i = 0; i < db_count; i++) { | 3893 | for (i = 0; i < db_count; i++) { |
3870 | block = descriptor_loc(sb, logical_sb_block, i); | 3894 | block = descriptor_loc(sb, logical_sb_block, i); |
3871 | sbi->s_group_desc[i] = sb_bread(sb, block); | 3895 | sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); |
3872 | if (!sbi->s_group_desc[i]) { | 3896 | if (!sbi->s_group_desc[i]) { |
3873 | ext4_msg(sb, KERN_ERR, | 3897 | ext4_msg(sb, KERN_ERR, |
3874 | "can't read group descriptor %d", i); | 3898 | "can't read group descriptor %d", i); |
@@ -3890,13 +3914,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3890 | sbi->s_err_report.data = (unsigned long) sb; | 3914 | sbi->s_err_report.data = (unsigned long) sb; |
3891 | 3915 | ||
3892 | /* Register extent status tree shrinker */ | 3916 | /* Register extent status tree shrinker */ |
3893 | ext4_es_register_shrinker(sbi); | 3917 | if (ext4_es_register_shrinker(sbi)) |
3894 | |||
3895 | err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); | ||
3896 | if (err) { | ||
3897 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
3898 | goto failed_mount3; | 3918 | goto failed_mount3; |
3899 | } | ||
3900 | 3919 | ||
3901 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3920 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3902 | sbi->s_extent_max_zeroout_kb = 32; | 3921 | sbi->s_extent_max_zeroout_kb = 32; |
@@ -3904,11 +3923,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3904 | /* | 3923 | /* |
3905 | * set up enough so that it can read an inode | 3924 | * set up enough so that it can read an inode |
3906 | */ | 3925 | */ |
3907 | if (!test_opt(sb, NOLOAD) && | 3926 | sb->s_op = &ext4_sops; |
3908 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) | ||
3909 | sb->s_op = &ext4_sops; | ||
3910 | else | ||
3911 | sb->s_op = &ext4_nojournal_sops; | ||
3912 | sb->s_export_op = &ext4_export_ops; | 3927 | sb->s_export_op = &ext4_export_ops; |
3913 | sb->s_xattr = ext4_xattr_handlers; | 3928 | sb->s_xattr = ext4_xattr_handlers; |
3914 | #ifdef CONFIG_QUOTA | 3929 | #ifdef CONFIG_QUOTA |
@@ -3932,7 +3947,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3932 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && | 3947 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && |
3933 | !(sb->s_flags & MS_RDONLY)) | 3948 | !(sb->s_flags & MS_RDONLY)) |
3934 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) | 3949 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) |
3935 | goto failed_mount3; | 3950 | goto failed_mount3a; |
3936 | 3951 | ||
3937 | /* | 3952 | /* |
3938 | * The first inode we look at is the journal inode. Don't try | 3953 | * The first inode we look at is the journal inode. Don't try |
@@ -3941,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3941 | if (!test_opt(sb, NOLOAD) && | 3956 | if (!test_opt(sb, NOLOAD) && |
3942 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { | 3957 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { |
3943 | if (ext4_load_journal(sb, es, journal_devnum)) | 3958 | if (ext4_load_journal(sb, es, journal_devnum)) |
3944 | goto failed_mount3; | 3959 | goto failed_mount3a; |
3945 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && | 3960 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && |
3946 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 3961 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
3947 | ext4_msg(sb, KERN_ERR, "required journal recovery " | 3962 | ext4_msg(sb, KERN_ERR, "required journal recovery " |
@@ -4229,10 +4244,10 @@ failed_mount_wq: | |||
4229 | jbd2_journal_destroy(sbi->s_journal); | 4244 | jbd2_journal_destroy(sbi->s_journal); |
4230 | sbi->s_journal = NULL; | 4245 | sbi->s_journal = NULL; |
4231 | } | 4246 | } |
4232 | failed_mount3: | 4247 | failed_mount3a: |
4233 | ext4_es_unregister_shrinker(sbi); | 4248 | ext4_es_unregister_shrinker(sbi); |
4249 | failed_mount3: | ||
4234 | del_timer_sync(&sbi->s_err_report); | 4250 | del_timer_sync(&sbi->s_err_report); |
4235 | percpu_counter_destroy(&sbi->s_extent_cache_cnt); | ||
4236 | if (sbi->s_mmp_tsk) | 4251 | if (sbi->s_mmp_tsk) |
4237 | kthread_stop(sbi->s_mmp_tsk); | 4252 | kthread_stop(sbi->s_mmp_tsk); |
4238 | failed_mount2: | 4253 | failed_mount2: |
@@ -4247,7 +4262,7 @@ failed_mount: | |||
4247 | remove_proc_entry(sb->s_id, ext4_proc_root); | 4262 | remove_proc_entry(sb->s_id, ext4_proc_root); |
4248 | } | 4263 | } |
4249 | #ifdef CONFIG_QUOTA | 4264 | #ifdef CONFIG_QUOTA |
4250 | for (i = 0; i < MAXQUOTAS; i++) | 4265 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
4251 | kfree(sbi->s_qf_names[i]); | 4266 | kfree(sbi->s_qf_names[i]); |
4252 | #endif | 4267 | #endif |
4253 | ext4_blkdev_remove(sbi); | 4268 | ext4_blkdev_remove(sbi); |
@@ -4375,6 +4390,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
4375 | goto out_bdev; | 4390 | goto out_bdev; |
4376 | } | 4391 | } |
4377 | 4392 | ||
4393 | if ((le32_to_cpu(es->s_feature_ro_compat) & | ||
4394 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
4395 | es->s_checksum != ext4_superblock_csum(sb, es)) { | ||
4396 | ext4_msg(sb, KERN_ERR, "external journal has " | ||
4397 | "corrupt superblock"); | ||
4398 | brelse(bh); | ||
4399 | goto out_bdev; | ||
4400 | } | ||
4401 | |||
4378 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { | 4402 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { |
4379 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); | 4403 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); |
4380 | brelse(bh); | 4404 | brelse(bh); |
@@ -4677,15 +4701,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4677 | * being sent at the end of the function. But we can skip it if | 4701 | * being sent at the end of the function. But we can skip it if |
4678 | * transaction_commit will do it for us. | 4702 | * transaction_commit will do it for us. |
4679 | */ | 4703 | */ |
4680 | target = jbd2_get_latest_transaction(sbi->s_journal); | 4704 | if (sbi->s_journal) { |
4681 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && | 4705 | target = jbd2_get_latest_transaction(sbi->s_journal); |
4682 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | 4706 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && |
4707 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | ||
4708 | needs_barrier = true; | ||
4709 | |||
4710 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | ||
4711 | if (wait) | ||
4712 | ret = jbd2_log_wait_commit(sbi->s_journal, | ||
4713 | target); | ||
4714 | } | ||
4715 | } else if (wait && test_opt(sb, BARRIER)) | ||
4683 | needs_barrier = true; | 4716 | needs_barrier = true; |
4684 | |||
4685 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | ||
4686 | if (wait) | ||
4687 | ret = jbd2_log_wait_commit(sbi->s_journal, target); | ||
4688 | } | ||
4689 | if (needs_barrier) { | 4717 | if (needs_barrier) { |
4690 | int err; | 4718 | int err; |
4691 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | 4719 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); |
@@ -4696,19 +4724,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4696 | return ret; | 4724 | return ret; |
4697 | } | 4725 | } |
4698 | 4726 | ||
4699 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) | ||
4700 | { | ||
4701 | int ret = 0; | ||
4702 | |||
4703 | trace_ext4_sync_fs(sb, wait); | ||
4704 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
4705 | dquot_writeback_dquots(sb, -1); | ||
4706 | if (wait && test_opt(sb, BARRIER)) | ||
4707 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
4708 | |||
4709 | return ret; | ||
4710 | } | ||
4711 | |||
4712 | /* | 4727 | /* |
4713 | * LVM calls this function before a (read-only) snapshot is created. This | 4728 | * LVM calls this function before a (read-only) snapshot is created. This |
4714 | * gives us a chance to flush the journal completely and mark the fs clean. | 4729 | * gives us a chance to flush the journal completely and mark the fs clean. |
@@ -4727,23 +4742,26 @@ static int ext4_freeze(struct super_block *sb) | |||
4727 | 4742 | ||
4728 | journal = EXT4_SB(sb)->s_journal; | 4743 | journal = EXT4_SB(sb)->s_journal; |
4729 | 4744 | ||
4730 | /* Now we set up the journal barrier. */ | 4745 | if (journal) { |
4731 | jbd2_journal_lock_updates(journal); | 4746 | /* Now we set up the journal barrier. */ |
4747 | jbd2_journal_lock_updates(journal); | ||
4732 | 4748 | ||
4733 | /* | 4749 | /* |
4734 | * Don't clear the needs_recovery flag if we failed to flush | 4750 | * Don't clear the needs_recovery flag if we failed to |
4735 | * the journal. | 4751 | * flush the journal. |
4736 | */ | 4752 | */ |
4737 | error = jbd2_journal_flush(journal); | 4753 | error = jbd2_journal_flush(journal); |
4738 | if (error < 0) | 4754 | if (error < 0) |
4739 | goto out; | 4755 | goto out; |
4756 | } | ||
4740 | 4757 | ||
4741 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 4758 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
4742 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4759 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
4743 | error = ext4_commit_super(sb, 1); | 4760 | error = ext4_commit_super(sb, 1); |
4744 | out: | 4761 | out: |
4745 | /* we rely on upper layer to stop further updates */ | 4762 | if (journal) |
4746 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 4763 | /* we rely on upper layer to stop further updates */ |
4764 | jbd2_journal_unlock_updates(journal); | ||
4747 | return error; | 4765 | return error; |
4748 | } | 4766 | } |
4749 | 4767 | ||
@@ -4774,7 +4792,7 @@ struct ext4_mount_options { | |||
4774 | u32 s_min_batch_time, s_max_batch_time; | 4792 | u32 s_min_batch_time, s_max_batch_time; |
4775 | #ifdef CONFIG_QUOTA | 4793 | #ifdef CONFIG_QUOTA |
4776 | int s_jquota_fmt; | 4794 | int s_jquota_fmt; |
4777 | char *s_qf_names[MAXQUOTAS]; | 4795 | char *s_qf_names[EXT4_MAXQUOTAS]; |
4778 | #endif | 4796 | #endif |
4779 | }; | 4797 | }; |
4780 | 4798 | ||
@@ -4804,7 +4822,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4804 | old_opts.s_max_batch_time = sbi->s_max_batch_time; | 4822 | old_opts.s_max_batch_time = sbi->s_max_batch_time; |
4805 | #ifdef CONFIG_QUOTA | 4823 | #ifdef CONFIG_QUOTA |
4806 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; | 4824 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; |
4807 | for (i = 0; i < MAXQUOTAS; i++) | 4825 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
4808 | if (sbi->s_qf_names[i]) { | 4826 | if (sbi->s_qf_names[i]) { |
4809 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], | 4827 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], |
4810 | GFP_KERNEL); | 4828 | GFP_KERNEL); |
@@ -4828,6 +4846,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4828 | goto restore_opts; | 4846 | goto restore_opts; |
4829 | } | 4847 | } |
4830 | 4848 | ||
4849 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ | ||
4850 | test_opt(sb, JOURNAL_CHECKSUM)) { | ||
4851 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " | ||
4852 | "during remount not supported"); | ||
4853 | err = -EINVAL; | ||
4854 | goto restore_opts; | ||
4855 | } | ||
4856 | |||
4831 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 4857 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
4832 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | 4858 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { |
4833 | ext4_msg(sb, KERN_ERR, "can't mount with " | 4859 | ext4_msg(sb, KERN_ERR, "can't mount with " |
@@ -4965,7 +4991,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4965 | 4991 | ||
4966 | #ifdef CONFIG_QUOTA | 4992 | #ifdef CONFIG_QUOTA |
4967 | /* Release old quota file names */ | 4993 | /* Release old quota file names */ |
4968 | for (i = 0; i < MAXQUOTAS; i++) | 4994 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
4969 | kfree(old_opts.s_qf_names[i]); | 4995 | kfree(old_opts.s_qf_names[i]); |
4970 | if (enable_quota) { | 4996 | if (enable_quota) { |
4971 | if (sb_any_quota_suspended(sb)) | 4997 | if (sb_any_quota_suspended(sb)) |
@@ -4994,7 +5020,7 @@ restore_opts: | |||
4994 | sbi->s_max_batch_time = old_opts.s_max_batch_time; | 5020 | sbi->s_max_batch_time = old_opts.s_max_batch_time; |
4995 | #ifdef CONFIG_QUOTA | 5021 | #ifdef CONFIG_QUOTA |
4996 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; | 5022 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; |
4997 | for (i = 0; i < MAXQUOTAS; i++) { | 5023 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
4998 | kfree(sbi->s_qf_names[i]); | 5024 | kfree(sbi->s_qf_names[i]); |
4999 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 5025 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
5000 | } | 5026 | } |
@@ -5197,7 +5223,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, | |||
5197 | { | 5223 | { |
5198 | int err; | 5224 | int err; |
5199 | struct inode *qf_inode; | 5225 | struct inode *qf_inode; |
5200 | unsigned long qf_inums[MAXQUOTAS] = { | 5226 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
5201 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5227 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
5202 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5228 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) |
5203 | }; | 5229 | }; |
@@ -5225,13 +5251,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, | |||
5225 | static int ext4_enable_quotas(struct super_block *sb) | 5251 | static int ext4_enable_quotas(struct super_block *sb) |
5226 | { | 5252 | { |
5227 | int type, err = 0; | 5253 | int type, err = 0; |
5228 | unsigned long qf_inums[MAXQUOTAS] = { | 5254 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
5229 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5255 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
5230 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5256 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) |
5231 | }; | 5257 | }; |
5232 | 5258 | ||
5233 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; | 5259 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; |
5234 | for (type = 0; type < MAXQUOTAS; type++) { | 5260 | for (type = 0; type < EXT4_MAXQUOTAS; type++) { |
5235 | if (qf_inums[type]) { | 5261 | if (qf_inums[type]) { |
5236 | err = ext4_quota_enable(sb, type, QFMT_VFS_V1, | 5262 | err = ext4_quota_enable(sb, type, QFMT_VFS_V1, |
5237 | DQUOT_USAGE_ENABLED); | 5263 | DQUOT_USAGE_ENABLED); |
@@ -5309,7 +5335,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | |||
5309 | { | 5335 | { |
5310 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5336 | struct inode *inode = sb_dqopt(sb)->files[type]; |
5311 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 5337 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
5312 | int err = 0; | ||
5313 | int offset = off & (sb->s_blocksize - 1); | 5338 | int offset = off & (sb->s_blocksize - 1); |
5314 | int tocopy; | 5339 | int tocopy; |
5315 | size_t toread; | 5340 | size_t toread; |
@@ -5324,9 +5349,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | |||
5324 | while (toread > 0) { | 5349 | while (toread > 0) { |
5325 | tocopy = sb->s_blocksize - offset < toread ? | 5350 | tocopy = sb->s_blocksize - offset < toread ? |
5326 | sb->s_blocksize - offset : toread; | 5351 | sb->s_blocksize - offset : toread; |
5327 | bh = ext4_bread(NULL, inode, blk, 0, &err); | 5352 | bh = ext4_bread(NULL, inode, blk, 0); |
5328 | if (err) | 5353 | if (IS_ERR(bh)) |
5329 | return err; | 5354 | return PTR_ERR(bh); |
5330 | if (!bh) /* A hole? */ | 5355 | if (!bh) /* A hole? */ |
5331 | memset(data, 0, tocopy); | 5356 | memset(data, 0, tocopy); |
5332 | else | 5357 | else |
@@ -5347,8 +5372,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
5347 | { | 5372 | { |
5348 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5373 | struct inode *inode = sb_dqopt(sb)->files[type]; |
5349 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 5374 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
5350 | int err = 0; | 5375 | int err, offset = off & (sb->s_blocksize - 1); |
5351 | int offset = off & (sb->s_blocksize - 1); | ||
5352 | struct buffer_head *bh; | 5376 | struct buffer_head *bh; |
5353 | handle_t *handle = journal_current_handle(); | 5377 | handle_t *handle = journal_current_handle(); |
5354 | 5378 | ||
@@ -5369,14 +5393,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
5369 | return -EIO; | 5393 | return -EIO; |
5370 | } | 5394 | } |
5371 | 5395 | ||
5372 | bh = ext4_bread(handle, inode, blk, 1, &err); | 5396 | bh = ext4_bread(handle, inode, blk, 1); |
5397 | if (IS_ERR(bh)) | ||
5398 | return PTR_ERR(bh); | ||
5373 | if (!bh) | 5399 | if (!bh) |
5374 | goto out; | 5400 | goto out; |
5375 | BUFFER_TRACE(bh, "get write access"); | 5401 | BUFFER_TRACE(bh, "get write access"); |
5376 | err = ext4_journal_get_write_access(handle, bh); | 5402 | err = ext4_journal_get_write_access(handle, bh); |
5377 | if (err) { | 5403 | if (err) { |
5378 | brelse(bh); | 5404 | brelse(bh); |
5379 | goto out; | 5405 | return err; |
5380 | } | 5406 | } |
5381 | lock_buffer(bh); | 5407 | lock_buffer(bh); |
5382 | memcpy(bh->b_data+offset, data, len); | 5408 | memcpy(bh->b_data+offset, data, len); |
@@ -5385,8 +5411,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
5385 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | 5411 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
5386 | brelse(bh); | 5412 | brelse(bh); |
5387 | out: | 5413 | out: |
5388 | if (err) | ||
5389 | return err; | ||
5390 | if (inode->i_size < off + len) { | 5414 | if (inode->i_size < off + len) { |
5391 | i_size_write(inode, off + len); | 5415 | i_size_write(inode, off + len); |
5392 | EXT4_I(inode)->i_disksize = inode->i_size; | 5416 | EXT4_I(inode)->i_disksize = inode->i_size; |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e7387337060c..1e09fc77395c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -142,8 +142,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode, | |||
142 | sector_t block_nr, | 142 | sector_t block_nr, |
143 | struct ext4_xattr_header *hdr) | 143 | struct ext4_xattr_header *hdr) |
144 | { | 144 | { |
145 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 145 | if (ext4_has_metadata_csum(inode->i_sb) && |
146 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
147 | (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) | 146 | (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) |
148 | return 0; | 147 | return 0; |
149 | return 1; | 148 | return 1; |
@@ -153,8 +152,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode, | |||
153 | sector_t block_nr, | 152 | sector_t block_nr, |
154 | struct ext4_xattr_header *hdr) | 153 | struct ext4_xattr_header *hdr) |
155 | { | 154 | { |
156 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 155 | if (!ext4_has_metadata_csum(inode->i_sb)) |
157 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
158 | return; | 156 | return; |
159 | 157 | ||
160 | hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); | 158 | hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); |
@@ -190,14 +188,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
190 | } | 188 | } |
191 | 189 | ||
192 | static int | 190 | static int |
193 | ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) | 191 | ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, |
192 | void *value_start) | ||
194 | { | 193 | { |
195 | while (!IS_LAST_ENTRY(entry)) { | 194 | struct ext4_xattr_entry *e = entry; |
196 | struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); | 195 | |
196 | while (!IS_LAST_ENTRY(e)) { | ||
197 | struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); | ||
197 | if ((void *)next >= end) | 198 | if ((void *)next >= end) |
198 | return -EIO; | 199 | return -EIO; |
199 | entry = next; | 200 | e = next; |
200 | } | 201 | } |
202 | |||
203 | while (!IS_LAST_ENTRY(entry)) { | ||
204 | if (entry->e_value_size != 0 && | ||
205 | (value_start + le16_to_cpu(entry->e_value_offs) < | ||
206 | (void *)e + sizeof(__u32) || | ||
207 | value_start + le16_to_cpu(entry->e_value_offs) + | ||
208 | le32_to_cpu(entry->e_value_size) > end)) | ||
209 | return -EIO; | ||
210 | entry = EXT4_XATTR_NEXT(entry); | ||
211 | } | ||
212 | |||
201 | return 0; | 213 | return 0; |
202 | } | 214 | } |
203 | 215 | ||
@@ -214,7 +226,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) | |||
214 | return -EIO; | 226 | return -EIO; |
215 | if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) | 227 | if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) |
216 | return -EIO; | 228 | return -EIO; |
217 | error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); | 229 | error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, |
230 | bh->b_data); | ||
218 | if (!error) | 231 | if (!error) |
219 | set_buffer_verified(bh); | 232 | set_buffer_verified(bh); |
220 | return error; | 233 | return error; |
@@ -331,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, | |||
331 | header = IHDR(inode, raw_inode); | 344 | header = IHDR(inode, raw_inode); |
332 | entry = IFIRST(header); | 345 | entry = IFIRST(header); |
333 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; | 346 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; |
334 | error = ext4_xattr_check_names(entry, end); | 347 | error = ext4_xattr_check_names(entry, end, entry); |
335 | if (error) | 348 | if (error) |
336 | goto cleanup; | 349 | goto cleanup; |
337 | error = ext4_xattr_find_entry(&entry, name_index, name, | 350 | error = ext4_xattr_find_entry(&entry, name_index, name, |
@@ -463,7 +476,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
463 | raw_inode = ext4_raw_inode(&iloc); | 476 | raw_inode = ext4_raw_inode(&iloc); |
464 | header = IHDR(inode, raw_inode); | 477 | header = IHDR(inode, raw_inode); |
465 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; | 478 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; |
466 | error = ext4_xattr_check_names(IFIRST(header), end); | 479 | error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); |
467 | if (error) | 480 | if (error) |
468 | goto cleanup; | 481 | goto cleanup; |
469 | error = ext4_xattr_list_entries(dentry, IFIRST(header), | 482 | error = ext4_xattr_list_entries(dentry, IFIRST(header), |
@@ -899,14 +912,8 @@ inserted: | |||
899 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 912 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
900 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 913 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
901 | 914 | ||
902 | /* | ||
903 | * take i_data_sem because we will test | ||
904 | * i_delalloc_reserved_flag in ext4_mb_new_blocks | ||
905 | */ | ||
906 | down_read(&EXT4_I(inode)->i_data_sem); | ||
907 | block = ext4_new_meta_blocks(handle, inode, goal, 0, | 915 | block = ext4_new_meta_blocks(handle, inode, goal, 0, |
908 | NULL, &error); | 916 | NULL, &error); |
909 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
910 | if (error) | 917 | if (error) |
911 | goto cleanup; | 918 | goto cleanup; |
912 | 919 | ||
@@ -986,7 +993,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, | |||
986 | is->s.here = is->s.first; | 993 | is->s.here = is->s.first; |
987 | is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; | 994 | is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; |
988 | if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { | 995 | if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { |
989 | error = ext4_xattr_check_names(IFIRST(header), is->s.end); | 996 | error = ext4_xattr_check_names(IFIRST(header), is->s.end, |
997 | IFIRST(header)); | ||
990 | if (error) | 998 | if (error) |
991 | return error; | 999 | return error; |
992 | /* Find the named attribute. */ | 1000 | /* Find the named attribute. */ |
diff --git a/fs/internal.h b/fs/internal.h index 9477f8f6aefc..757ba2abf21e 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -47,7 +47,6 @@ extern void __init chrdev_init(void); | |||
47 | /* | 47 | /* |
48 | * namei.c | 48 | * namei.c |
49 | */ | 49 | */ |
50 | extern int __inode_permission(struct inode *, int); | ||
51 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); | 50 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); |
52 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, | 51 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, |
53 | const char *, unsigned int, struct path *); | 52 | const char *, unsigned int, struct path *); |
@@ -139,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | |||
139 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); | 138 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); |
140 | 139 | ||
141 | /* | 140 | /* |
142 | * splice.c | ||
143 | */ | ||
144 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||
145 | loff_t *opos, size_t len, unsigned int flags); | ||
146 | |||
147 | /* | ||
148 | * pipe.c | 141 | * pipe.c |
149 | */ | 142 | */ |
150 | extern const struct file_operations pipefifo_fops; | 143 | extern const struct file_operations pipefifo_fops; |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 881b3bd0143f..fe839b915116 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -29,13 +29,9 @@ | |||
29 | #define BEQUIET | 29 | #define BEQUIET |
30 | 30 | ||
31 | static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); | 31 | static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); |
32 | static int isofs_hash(const struct dentry *parent, struct qstr *qstr); | ||
33 | static int isofs_dentry_cmpi(const struct dentry *parent, | 32 | static int isofs_dentry_cmpi(const struct dentry *parent, |
34 | const struct dentry *dentry, | 33 | const struct dentry *dentry, |
35 | unsigned int len, const char *str, const struct qstr *name); | 34 | unsigned int len, const char *str, const struct qstr *name); |
36 | static int isofs_dentry_cmp(const struct dentry *parent, | ||
37 | const struct dentry *dentry, | ||
38 | unsigned int len, const char *str, const struct qstr *name); | ||
39 | 35 | ||
40 | #ifdef CONFIG_JOLIET | 36 | #ifdef CONFIG_JOLIET |
41 | static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); | 37 | static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); |
@@ -135,10 +131,6 @@ static const struct super_operations isofs_sops = { | |||
135 | 131 | ||
136 | static const struct dentry_operations isofs_dentry_ops[] = { | 132 | static const struct dentry_operations isofs_dentry_ops[] = { |
137 | { | 133 | { |
138 | .d_hash = isofs_hash, | ||
139 | .d_compare = isofs_dentry_cmp, | ||
140 | }, | ||
141 | { | ||
142 | .d_hash = isofs_hashi, | 134 | .d_hash = isofs_hashi, |
143 | .d_compare = isofs_dentry_cmpi, | 135 | .d_compare = isofs_dentry_cmpi, |
144 | }, | 136 | }, |
@@ -258,25 +250,12 @@ static int isofs_dentry_cmp_common( | |||
258 | } | 250 | } |
259 | 251 | ||
260 | static int | 252 | static int |
261 | isofs_hash(const struct dentry *dentry, struct qstr *qstr) | ||
262 | { | ||
263 | return isofs_hash_common(qstr, 0); | ||
264 | } | ||
265 | |||
266 | static int | ||
267 | isofs_hashi(const struct dentry *dentry, struct qstr *qstr) | 253 | isofs_hashi(const struct dentry *dentry, struct qstr *qstr) |
268 | { | 254 | { |
269 | return isofs_hashi_common(qstr, 0); | 255 | return isofs_hashi_common(qstr, 0); |
270 | } | 256 | } |
271 | 257 | ||
272 | static int | 258 | static int |
273 | isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry, | ||
274 | unsigned int len, const char *str, const struct qstr *name) | ||
275 | { | ||
276 | return isofs_dentry_cmp_common(len, str, name, 0, 0); | ||
277 | } | ||
278 | |||
279 | static int | ||
280 | isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, | 259 | isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, |
281 | unsigned int len, const char *str, const struct qstr *name) | 260 | unsigned int len, const char *str, const struct qstr *name) |
282 | { | 261 | { |
@@ -930,7 +909,8 @@ root_found: | |||
930 | if (opt.check == 'r') | 909 | if (opt.check == 'r') |
931 | table++; | 910 | table++; |
932 | 911 | ||
933 | s->s_d_op = &isofs_dentry_ops[table]; | 912 | if (table) |
913 | s->s_d_op = &isofs_dentry_ops[table - 1]; | ||
934 | 914 | ||
935 | /* get the root dentry */ | 915 | /* get the root dentry */ |
936 | s->s_root = d_make_root(inode); | 916 | s->s_root = d_make_root(inode); |
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 95295640d9c8..7b543e6b6526 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c | |||
@@ -18,25 +18,10 @@ static int | |||
18 | isofs_cmp(struct dentry *dentry, const char *compare, int dlen) | 18 | isofs_cmp(struct dentry *dentry, const char *compare, int dlen) |
19 | { | 19 | { |
20 | struct qstr qstr; | 20 | struct qstr qstr; |
21 | |||
22 | if (!compare) | ||
23 | return 1; | ||
24 | |||
25 | /* check special "." and ".." files */ | ||
26 | if (dlen == 1) { | ||
27 | /* "." */ | ||
28 | if (compare[0] == 0) { | ||
29 | if (!dentry->d_name.len) | ||
30 | return 0; | ||
31 | compare = "."; | ||
32 | } else if (compare[0] == 1) { | ||
33 | compare = ".."; | ||
34 | dlen = 2; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | qstr.name = compare; | 21 | qstr.name = compare; |
39 | qstr.len = dlen; | 22 | qstr.len = dlen; |
23 | if (likely(!dentry->d_op)) | ||
24 | return dentry->d_name.len != dlen || memcmp(dentry->d_name.name, compare, dlen); | ||
40 | return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); | 25 | return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); |
41 | } | 26 | } |
42 | 27 | ||
@@ -146,7 +131,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
146 | (!(de->flags[-sbi->s_high_sierra] & 1))) && | 131 | (!(de->flags[-sbi->s_high_sierra] & 1))) && |
147 | (sbi->s_showassoc || | 132 | (sbi->s_showassoc || |
148 | (!(de->flags[-sbi->s_high_sierra] & 4)))) { | 133 | (!(de->flags[-sbi->s_high_sierra] & 4)))) { |
149 | match = (isofs_cmp(dentry, dpnt, dlen) == 0); | 134 | if (dpnt && (dlen > 1 || dpnt[0] > 1)) |
135 | match = (isofs_cmp(dentry, dpnt, dlen) == 0); | ||
150 | } | 136 | } |
151 | if (match) { | 137 | if (match) { |
152 | isofs_normalize_block_and_offset(de, | 138 | isofs_normalize_block_and_offset(de, |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 06fe11e0abfa..aab8549591e7 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -886,7 +886,7 @@ journal_t * journal_init_inode (struct inode *inode) | |||
886 | goto out_err; | 886 | goto out_err; |
887 | } | 887 | } |
888 | 888 | ||
889 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 889 | bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); |
890 | if (!bh) { | 890 | if (!bh) { |
891 | printk(KERN_ERR | 891 | printk(KERN_ERR |
892 | "%s: Cannot get buffer for journal superblock\n", | 892 | "%s: Cannot get buffer for journal superblock\n", |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 8898bbd2b61e..dcead636c33b 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -93,6 +93,7 @@ | |||
93 | #include <linux/bio.h> | 93 | #include <linux/bio.h> |
94 | #endif | 94 | #endif |
95 | #include <linux/log2.h> | 95 | #include <linux/log2.h> |
96 | #include <linux/hash.h> | ||
96 | 97 | ||
97 | static struct kmem_cache *revoke_record_cache; | 98 | static struct kmem_cache *revoke_record_cache; |
98 | static struct kmem_cache *revoke_table_cache; | 99 | static struct kmem_cache *revoke_table_cache; |
@@ -129,15 +130,11 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int); | |||
129 | 130 | ||
130 | /* Utility functions to maintain the revoke table */ | 131 | /* Utility functions to maintain the revoke table */ |
131 | 132 | ||
132 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ | ||
133 | static inline int hash(journal_t *journal, unsigned int block) | 133 | static inline int hash(journal_t *journal, unsigned int block) |
134 | { | 134 | { |
135 | struct jbd_revoke_table_s *table = journal->j_revoke; | 135 | struct jbd_revoke_table_s *table = journal->j_revoke; |
136 | int hash_shift = table->hash_shift; | ||
137 | 136 | ||
138 | return ((block << (hash_shift - 6)) ^ | 137 | return hash_32(block, table->hash_shift); |
139 | (block >> 13) ^ | ||
140 | (block << (hash_shift - 12))) & (table->hash_size - 1); | ||
141 | } | 138 | } |
142 | 139 | ||
143 | static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, | 140 | static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 7f34f4716165..988b32ed4c87 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
96 | 96 | ||
97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && | 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && |
98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
99 | /* | ||
100 | * Get our reference so that bh cannot be freed before | ||
101 | * we unlock it | ||
102 | */ | ||
103 | get_bh(bh); | ||
104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
105 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
106 | BUFFER_TRACE(bh, "release"); | ||
107 | __brelse(bh); | ||
108 | } | 101 | } |
109 | return ret; | 102 | return ret; |
110 | } | 103 | } |
@@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
122 | 115 | ||
123 | nblocks = jbd2_space_needed(journal); | 116 | nblocks = jbd2_space_needed(journal); |
124 | while (jbd2_log_space_left(journal) < nblocks) { | 117 | while (jbd2_log_space_left(journal) < nblocks) { |
125 | if (journal->j_flags & JBD2_ABORT) | ||
126 | return; | ||
127 | write_unlock(&journal->j_state_lock); | 118 | write_unlock(&journal->j_state_lock); |
128 | mutex_lock(&journal->j_checkpoint_mutex); | 119 | mutex_lock(&journal->j_checkpoint_mutex); |
129 | 120 | ||
@@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
139 | * trace for forensic evidence. | 130 | * trace for forensic evidence. |
140 | */ | 131 | */ |
141 | write_lock(&journal->j_state_lock); | 132 | write_lock(&journal->j_state_lock); |
133 | if (journal->j_flags & JBD2_ABORT) { | ||
134 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
135 | return; | ||
136 | } | ||
142 | spin_lock(&journal->j_list_lock); | 137 | spin_lock(&journal->j_list_lock); |
143 | nblocks = jbd2_space_needed(journal); | 138 | nblocks = jbd2_space_needed(journal); |
144 | space_left = jbd2_log_space_left(journal); | 139 | space_left = jbd2_log_space_left(journal); |
@@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
183 | } | 178 | } |
184 | } | 179 | } |
185 | 180 | ||
186 | /* | ||
187 | * Clean up transaction's list of buffers submitted for io. | ||
188 | * We wait for any pending IO to complete and remove any clean | ||
189 | * buffers. Note that we take the buffers in the opposite ordering | ||
190 | * from the one in which they were submitted for IO. | ||
191 | * | ||
192 | * Return 0 on success, and return <0 if some buffers have failed | ||
193 | * to be written out. | ||
194 | * | ||
195 | * Called with j_list_lock held. | ||
196 | */ | ||
197 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) | ||
198 | { | ||
199 | struct journal_head *jh; | ||
200 | struct buffer_head *bh; | ||
201 | tid_t this_tid; | ||
202 | int released = 0; | ||
203 | int ret = 0; | ||
204 | |||
205 | this_tid = transaction->t_tid; | ||
206 | restart: | ||
207 | /* Did somebody clean up the transaction in the meanwhile? */ | ||
208 | if (journal->j_checkpoint_transactions != transaction || | ||
209 | transaction->t_tid != this_tid) | ||
210 | return ret; | ||
211 | while (!released && transaction->t_checkpoint_io_list) { | ||
212 | jh = transaction->t_checkpoint_io_list; | ||
213 | bh = jh2bh(jh); | ||
214 | get_bh(bh); | ||
215 | if (buffer_locked(bh)) { | ||
216 | spin_unlock(&journal->j_list_lock); | ||
217 | wait_on_buffer(bh); | ||
218 | /* the journal_head may have gone by now */ | ||
219 | BUFFER_TRACE(bh, "brelse"); | ||
220 | __brelse(bh); | ||
221 | spin_lock(&journal->j_list_lock); | ||
222 | goto restart; | ||
223 | } | ||
224 | if (unlikely(buffer_write_io_error(bh))) | ||
225 | ret = -EIO; | ||
226 | |||
227 | /* | ||
228 | * Now in whatever state the buffer currently is, we know that | ||
229 | * it has been written out and so we can drop it from the list | ||
230 | */ | ||
231 | released = __jbd2_journal_remove_checkpoint(jh); | ||
232 | __brelse(bh); | ||
233 | } | ||
234 | |||
235 | return ret; | ||
236 | } | ||
237 | |||
238 | static void | 181 | static void |
239 | __flush_batch(journal_t *journal, int *batch_count) | 182 | __flush_batch(journal_t *journal, int *batch_count) |
240 | { | 183 | { |
@@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
255 | } | 198 | } |
256 | 199 | ||
257 | /* | 200 | /* |
258 | * Try to flush one buffer from the checkpoint list to disk. | ||
259 | * | ||
260 | * Return 1 if something happened which requires us to abort the current | ||
261 | * scan of the checkpoint list. Return <0 if the buffer has failed to | ||
262 | * be written out. | ||
263 | * | ||
264 | * Called with j_list_lock held and drops it if 1 is returned | ||
265 | */ | ||
266 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | ||
267 | int *batch_count, transaction_t *transaction) | ||
268 | { | ||
269 | struct buffer_head *bh = jh2bh(jh); | ||
270 | int ret = 0; | ||
271 | |||
272 | if (buffer_locked(bh)) { | ||
273 | get_bh(bh); | ||
274 | spin_unlock(&journal->j_list_lock); | ||
275 | wait_on_buffer(bh); | ||
276 | /* the journal_head may have gone by now */ | ||
277 | BUFFER_TRACE(bh, "brelse"); | ||
278 | __brelse(bh); | ||
279 | ret = 1; | ||
280 | } else if (jh->b_transaction != NULL) { | ||
281 | transaction_t *t = jh->b_transaction; | ||
282 | tid_t tid = t->t_tid; | ||
283 | |||
284 | transaction->t_chp_stats.cs_forced_to_close++; | ||
285 | spin_unlock(&journal->j_list_lock); | ||
286 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | ||
287 | /* | ||
288 | * The journal thread is dead; so starting and | ||
289 | * waiting for a commit to finish will cause | ||
290 | * us to wait for a _very_ long time. | ||
291 | */ | ||
292 | printk(KERN_ERR "JBD2: %s: " | ||
293 | "Waiting for Godot: block %llu\n", | ||
294 | journal->j_devname, | ||
295 | (unsigned long long) bh->b_blocknr); | ||
296 | jbd2_log_start_commit(journal, tid); | ||
297 | jbd2_log_wait_commit(journal, tid); | ||
298 | ret = 1; | ||
299 | } else if (!buffer_dirty(bh)) { | ||
300 | ret = 1; | ||
301 | if (unlikely(buffer_write_io_error(bh))) | ||
302 | ret = -EIO; | ||
303 | get_bh(bh); | ||
304 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
305 | __jbd2_journal_remove_checkpoint(jh); | ||
306 | spin_unlock(&journal->j_list_lock); | ||
307 | __brelse(bh); | ||
308 | } else { | ||
309 | /* | ||
310 | * Important: we are about to write the buffer, and | ||
311 | * possibly block, while still holding the journal lock. | ||
312 | * We cannot afford to let the transaction logic start | ||
313 | * messing around with this buffer before we write it to | ||
314 | * disk, as that would break recoverability. | ||
315 | */ | ||
316 | BUFFER_TRACE(bh, "queue"); | ||
317 | get_bh(bh); | ||
318 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | ||
319 | journal->j_chkpt_bhs[*batch_count] = bh; | ||
320 | __buffer_relink_io(jh); | ||
321 | transaction->t_chp_stats.cs_written++; | ||
322 | (*batch_count)++; | ||
323 | if (*batch_count == JBD2_NR_BATCH) { | ||
324 | spin_unlock(&journal->j_list_lock); | ||
325 | __flush_batch(journal, batch_count); | ||
326 | ret = 1; | ||
327 | } | ||
328 | } | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Perform an actual checkpoint. We take the first transaction on the | 201 | * Perform an actual checkpoint. We take the first transaction on the |
334 | * list of transactions to be checkpointed and send all its buffers | 202 | * list of transactions to be checkpointed and send all its buffers |
335 | * to disk. We submit larger chunks of data at once. | 203 | * to disk. We submit larger chunks of data at once. |
@@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
339 | */ | 207 | */ |
340 | int jbd2_log_do_checkpoint(journal_t *journal) | 208 | int jbd2_log_do_checkpoint(journal_t *journal) |
341 | { | 209 | { |
342 | transaction_t *transaction; | 210 | struct journal_head *jh; |
343 | tid_t this_tid; | 211 | struct buffer_head *bh; |
344 | int result; | 212 | transaction_t *transaction; |
213 | tid_t this_tid; | ||
214 | int result, batch_count = 0; | ||
345 | 215 | ||
346 | jbd_debug(1, "Start checkpoint\n"); | 216 | jbd_debug(1, "Start checkpoint\n"); |
347 | 217 | ||
@@ -374,45 +244,117 @@ restart: | |||
374 | * done (maybe it's a new transaction, but it fell at the same | 244 | * done (maybe it's a new transaction, but it fell at the same |
375 | * address). | 245 | * address). |
376 | */ | 246 | */ |
377 | if (journal->j_checkpoint_transactions == transaction && | 247 | if (journal->j_checkpoint_transactions != transaction || |
378 | transaction->t_tid == this_tid) { | 248 | transaction->t_tid != this_tid) |
379 | int batch_count = 0; | 249 | goto out; |
380 | struct journal_head *jh; | 250 | |
381 | int retry = 0, err; | 251 | /* checkpoint all of the transaction's buffers */ |
382 | 252 | while (transaction->t_checkpoint_list) { | |
383 | while (!retry && transaction->t_checkpoint_list) { | 253 | jh = transaction->t_checkpoint_list; |
384 | jh = transaction->t_checkpoint_list; | 254 | bh = jh2bh(jh); |
385 | retry = __process_buffer(journal, jh, &batch_count, | 255 | |
386 | transaction); | 256 | if (buffer_locked(bh)) { |
387 | if (retry < 0 && !result) | 257 | spin_unlock(&journal->j_list_lock); |
388 | result = retry; | 258 | get_bh(bh); |
389 | if (!retry && (need_resched() || | 259 | wait_on_buffer(bh); |
390 | spin_needbreak(&journal->j_list_lock))) { | 260 | /* the journal_head may have gone by now */ |
391 | spin_unlock(&journal->j_list_lock); | 261 | BUFFER_TRACE(bh, "brelse"); |
392 | retry = 1; | 262 | __brelse(bh); |
393 | break; | 263 | goto retry; |
394 | } | ||
395 | } | 264 | } |
265 | if (jh->b_transaction != NULL) { | ||
266 | transaction_t *t = jh->b_transaction; | ||
267 | tid_t tid = t->t_tid; | ||
396 | 268 | ||
397 | if (batch_count) { | 269 | transaction->t_chp_stats.cs_forced_to_close++; |
398 | if (!retry) { | 270 | spin_unlock(&journal->j_list_lock); |
399 | spin_unlock(&journal->j_list_lock); | 271 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) |
400 | retry = 1; | 272 | /* |
401 | } | 273 | * The journal thread is dead; so |
402 | __flush_batch(journal, &batch_count); | 274 | * starting and waiting for a commit |
275 | * to finish will cause us to wait for | ||
276 | * a _very_ long time. | ||
277 | */ | ||
278 | printk(KERN_ERR | ||
279 | "JBD2: %s: Waiting for Godot: block %llu\n", | ||
280 | journal->j_devname, (unsigned long long) bh->b_blocknr); | ||
281 | |||
282 | jbd2_log_start_commit(journal, tid); | ||
283 | jbd2_log_wait_commit(journal, tid); | ||
284 | goto retry; | ||
285 | } | ||
286 | if (!buffer_dirty(bh)) { | ||
287 | if (unlikely(buffer_write_io_error(bh)) && !result) | ||
288 | result = -EIO; | ||
289 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
290 | if (__jbd2_journal_remove_checkpoint(jh)) | ||
291 | /* The transaction was released; we're done */ | ||
292 | goto out; | ||
293 | continue; | ||
403 | } | 294 | } |
295 | /* | ||
296 | * Important: we are about to write the buffer, and | ||
297 | * possibly block, while still holding the journal | ||
298 | * lock. We cannot afford to let the transaction | ||
299 | * logic start messing around with this buffer before | ||
300 | * we write it to disk, as that would break | ||
301 | * recoverability. | ||
302 | */ | ||
303 | BUFFER_TRACE(bh, "queue"); | ||
304 | get_bh(bh); | ||
305 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | ||
306 | journal->j_chkpt_bhs[batch_count++] = bh; | ||
307 | __buffer_relink_io(jh); | ||
308 | transaction->t_chp_stats.cs_written++; | ||
309 | if ((batch_count == JBD2_NR_BATCH) || | ||
310 | need_resched() || | ||
311 | spin_needbreak(&journal->j_list_lock)) | ||
312 | goto unlock_and_flush; | ||
313 | } | ||
404 | 314 | ||
405 | if (retry) { | 315 | if (batch_count) { |
316 | unlock_and_flush: | ||
317 | spin_unlock(&journal->j_list_lock); | ||
318 | retry: | ||
319 | if (batch_count) | ||
320 | __flush_batch(journal, &batch_count); | ||
406 | spin_lock(&journal->j_list_lock); | 321 | spin_lock(&journal->j_list_lock); |
407 | goto restart; | 322 | goto restart; |
323 | } | ||
324 | |||
325 | /* | ||
326 | * Now we issued all of the transaction's buffers, let's deal | ||
327 | * with the buffers that are out for I/O. | ||
328 | */ | ||
329 | restart2: | ||
330 | /* Did somebody clean up the transaction in the meanwhile? */ | ||
331 | if (journal->j_checkpoint_transactions != transaction || | ||
332 | transaction->t_tid != this_tid) | ||
333 | goto out; | ||
334 | |||
335 | while (transaction->t_checkpoint_io_list) { | ||
336 | jh = transaction->t_checkpoint_io_list; | ||
337 | bh = jh2bh(jh); | ||
338 | if (buffer_locked(bh)) { | ||
339 | spin_unlock(&journal->j_list_lock); | ||
340 | get_bh(bh); | ||
341 | wait_on_buffer(bh); | ||
342 | /* the journal_head may have gone by now */ | ||
343 | BUFFER_TRACE(bh, "brelse"); | ||
344 | __brelse(bh); | ||
345 | spin_lock(&journal->j_list_lock); | ||
346 | goto restart2; | ||
408 | } | 347 | } |
348 | if (unlikely(buffer_write_io_error(bh)) && !result) | ||
349 | result = -EIO; | ||
350 | |||
409 | /* | 351 | /* |
410 | * Now we have cleaned up the first transaction's checkpoint | 352 | * Now in whatever state the buffer currently is, we |
411 | * list. Let's clean up the second one | 353 | * know that it has been written out and so we can |
354 | * drop it from the list | ||
412 | */ | 355 | */ |
413 | err = __wait_cp_io(journal, transaction); | 356 | if (__jbd2_journal_remove_checkpoint(jh)) |
414 | if (!result) | 357 | break; |
415 | result = err; | ||
416 | } | 358 | } |
417 | out: | 359 | out: |
418 | spin_unlock(&journal->j_list_lock); | 360 | spin_unlock(&journal->j_list_lock); |
@@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
478 | * Find all the written-back checkpoint buffers in the given list and | 420 | * Find all the written-back checkpoint buffers in the given list and |
479 | * release them. | 421 | * release them. |
480 | * | 422 | * |
481 | * Called with the journal locked. | ||
482 | * Called with j_list_lock held. | 423 | * Called with j_list_lock held. |
483 | * Returns number of buffers reaped (for debug) | 424 | * Returns 1 if we freed the transaction, 0 otherwise. |
484 | */ | 425 | */ |
485 | 426 | static int journal_clean_one_cp_list(struct journal_head *jh) | |
486 | static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | ||
487 | { | 427 | { |
488 | struct journal_head *last_jh; | 428 | struct journal_head *last_jh; |
489 | struct journal_head *next_jh = jh; | 429 | struct journal_head *next_jh = jh; |
490 | int ret, freed = 0; | 430 | int ret; |
431 | int freed = 0; | ||
491 | 432 | ||
492 | *released = 0; | ||
493 | if (!jh) | 433 | if (!jh) |
494 | return 0; | 434 | return 0; |
495 | 435 | ||
@@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
498 | jh = next_jh; | 438 | jh = next_jh; |
499 | next_jh = jh->b_cpnext; | 439 | next_jh = jh->b_cpnext; |
500 | ret = __try_to_free_cp_buf(jh); | 440 | ret = __try_to_free_cp_buf(jh); |
501 | if (ret) { | 441 | if (!ret) |
502 | freed++; | 442 | return freed; |
503 | if (ret == 2) { | 443 | if (ret == 2) |
504 | *released = 1; | 444 | return 1; |
505 | return freed; | 445 | freed = 1; |
506 | } | ||
507 | } | ||
508 | /* | 446 | /* |
509 | * This function only frees up some memory | 447 | * This function only frees up some memory |
510 | * if possible so we dont have an obligation | 448 | * if possible so we dont have an obligation |
@@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
523 | * | 461 | * |
524 | * Find all the written-back checkpoint buffers in the journal and release them. | 462 | * Find all the written-back checkpoint buffers in the journal and release them. |
525 | * | 463 | * |
526 | * Called with the journal locked. | ||
527 | * Called with j_list_lock held. | 464 | * Called with j_list_lock held. |
528 | * Returns number of buffers reaped (for debug) | ||
529 | */ | 465 | */ |
530 | 466 | void __jbd2_journal_clean_checkpoint_list(journal_t *journal) | |
531 | int __jbd2_journal_clean_checkpoint_list(journal_t *journal) | ||
532 | { | 467 | { |
533 | transaction_t *transaction, *last_transaction, *next_transaction; | 468 | transaction_t *transaction, *last_transaction, *next_transaction; |
534 | int ret = 0; | 469 | int ret; |
535 | int released; | ||
536 | 470 | ||
537 | transaction = journal->j_checkpoint_transactions; | 471 | transaction = journal->j_checkpoint_transactions; |
538 | if (!transaction) | 472 | if (!transaction) |
539 | goto out; | 473 | return; |
540 | 474 | ||
541 | last_transaction = transaction->t_cpprev; | 475 | last_transaction = transaction->t_cpprev; |
542 | next_transaction = transaction; | 476 | next_transaction = transaction; |
543 | do { | 477 | do { |
544 | transaction = next_transaction; | 478 | transaction = next_transaction; |
545 | next_transaction = transaction->t_cpnext; | 479 | next_transaction = transaction->t_cpnext; |
546 | ret += journal_clean_one_cp_list(transaction-> | 480 | ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); |
547 | t_checkpoint_list, &released); | ||
548 | /* | 481 | /* |
549 | * This function only frees up some memory if possible so we | 482 | * This function only frees up some memory if possible so we |
550 | * dont have an obligation to finish processing. Bail out if | 483 | * dont have an obligation to finish processing. Bail out if |
551 | * preemption requested: | 484 | * preemption requested: |
552 | */ | 485 | */ |
553 | if (need_resched()) | 486 | if (need_resched()) |
554 | goto out; | 487 | return; |
555 | if (released) | 488 | if (ret) |
556 | continue; | 489 | continue; |
557 | /* | 490 | /* |
558 | * It is essential that we are as careful as in the case of | 491 | * It is essential that we are as careful as in the case of |
559 | * t_checkpoint_list with removing the buffer from the list as | 492 | * t_checkpoint_list with removing the buffer from the list as |
560 | * we can possibly see not yet submitted buffers on io_list | 493 | * we can possibly see not yet submitted buffers on io_list |
561 | */ | 494 | */ |
562 | ret += journal_clean_one_cp_list(transaction-> | 495 | ret = journal_clean_one_cp_list(transaction-> |
563 | t_checkpoint_io_list, &released); | 496 | t_checkpoint_io_list); |
564 | if (need_resched()) | 497 | if (need_resched()) |
565 | goto out; | 498 | return; |
499 | /* | ||
500 | * Stop scanning if we couldn't free the transaction. This | ||
501 | * avoids pointless scanning of transactions which still | ||
502 | * weren't checkpointed. | ||
503 | */ | ||
504 | if (!ret) | ||
505 | return; | ||
566 | } while (transaction != last_transaction); | 506 | } while (transaction != last_transaction); |
567 | out: | ||
568 | return ret; | ||
569 | } | 507 | } |
570 | 508 | ||
571 | /* | 509 | /* |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 19d74d86d99c..e4dc74713a43 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1237,7 +1237,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
1237 | goto out_err; | 1237 | goto out_err; |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 1240 | bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); |
1241 | if (!bh) { | 1241 | if (!bh) { |
1242 | printk(KERN_ERR | 1242 | printk(KERN_ERR |
1243 | "%s: Cannot get buffer for journal superblock\n", | 1243 | "%s: Cannot get buffer for journal superblock\n", |
@@ -1522,14 +1522,6 @@ static int journal_get_superblock(journal_t *journal) | |||
1522 | goto out; | 1522 | goto out; |
1523 | } | 1523 | } |
1524 | 1524 | ||
1525 | if (jbd2_journal_has_csum_v2or3(journal) && | ||
1526 | JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
1527 | /* Can't have checksum v1 and v2 on at the same time! */ | ||
1528 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " | ||
1529 | "at the same time!\n"); | ||
1530 | goto out; | ||
1531 | } | ||
1532 | |||
1533 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && | 1525 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && |
1534 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { | 1526 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { |
1535 | /* Can't have checksum v2 and v3 at the same time! */ | 1527 | /* Can't have checksum v2 and v3 at the same time! */ |
@@ -1538,6 +1530,14 @@ static int journal_get_superblock(journal_t *journal) | |||
1538 | goto out; | 1530 | goto out; |
1539 | } | 1531 | } |
1540 | 1532 | ||
1533 | if (jbd2_journal_has_csum_v2or3(journal) && | ||
1534 | JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
1535 | /* Can't have checksum v1 and v2 on at the same time! */ | ||
1536 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " | ||
1537 | "at the same time!\n"); | ||
1538 | goto out; | ||
1539 | } | ||
1540 | |||
1541 | if (!jbd2_verify_csum_type(journal, sb)) { | 1541 | if (!jbd2_verify_csum_type(journal, sb)) { |
1542 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); | 1542 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); |
1543 | goto out; | 1543 | goto out; |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 9b329b55ffe3..bcbef08a4d8f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal, | |||
525 | !jbd2_descr_block_csum_verify(journal, | 525 | !jbd2_descr_block_csum_verify(journal, |
526 | bh->b_data)) { | 526 | bh->b_data)) { |
527 | err = -EIO; | 527 | err = -EIO; |
528 | brelse(bh); | ||
528 | goto failed; | 529 | goto failed; |
529 | } | 530 | } |
530 | 531 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index d5e95a175c92..c6cbaef2bda1 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -92,6 +92,7 @@ | |||
92 | #include <linux/init.h> | 92 | #include <linux/init.h> |
93 | #include <linux/bio.h> | 93 | #include <linux/bio.h> |
94 | #include <linux/log2.h> | 94 | #include <linux/log2.h> |
95 | #include <linux/hash.h> | ||
95 | #endif | 96 | #endif |
96 | 97 | ||
97 | static struct kmem_cache *jbd2_revoke_record_cache; | 98 | static struct kmem_cache *jbd2_revoke_record_cache; |
@@ -130,16 +131,9 @@ static void flush_descriptor(journal_t *, struct buffer_head *, int, int); | |||
130 | 131 | ||
131 | /* Utility functions to maintain the revoke table */ | 132 | /* Utility functions to maintain the revoke table */ |
132 | 133 | ||
133 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ | ||
134 | static inline int hash(journal_t *journal, unsigned long long block) | 134 | static inline int hash(journal_t *journal, unsigned long long block) |
135 | { | 135 | { |
136 | struct jbd2_revoke_table_s *table = journal->j_revoke; | 136 | return hash_64(block, journal->j_revoke->hash_shift); |
137 | int hash_shift = table->hash_shift; | ||
138 | int hash = (int)block ^ (int)((block >> 31) >> 1); | ||
139 | |||
140 | return ((hash << (hash_shift - 6)) ^ | ||
141 | (hash >> 13) ^ | ||
142 | (hash << (hash_shift - 12))) & (table->hash_size - 1); | ||
143 | } | 137 | } |
144 | 138 | ||
145 | static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, | 139 | static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, |
diff --git a/fs/namei.c b/fs/namei.c index 43927d14db67..db5fe86319e6 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask) | |||
416 | 416 | ||
417 | return security_inode_permission(inode, mask); | 417 | return security_inode_permission(inode, mask); |
418 | } | 418 | } |
419 | EXPORT_SYMBOL(__inode_permission); | ||
419 | 420 | ||
420 | /** | 421 | /** |
421 | * sb_permission - Check superblock-level permissions | 422 | * sb_permission - Check superblock-level permissions |
@@ -2383,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path, | |||
2383 | } | 2384 | } |
2384 | EXPORT_SYMBOL(kern_path_mountpoint); | 2385 | EXPORT_SYMBOL(kern_path_mountpoint); |
2385 | 2386 | ||
2386 | /* | 2387 | int __check_sticky(struct inode *dir, struct inode *inode) |
2387 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
2388 | * minimal. | ||
2389 | */ | ||
2390 | static inline int check_sticky(struct inode *dir, struct inode *inode) | ||
2391 | { | 2388 | { |
2392 | kuid_t fsuid = current_fsuid(); | 2389 | kuid_t fsuid = current_fsuid(); |
2393 | 2390 | ||
2394 | if (!(dir->i_mode & S_ISVTX)) | ||
2395 | return 0; | ||
2396 | if (uid_eq(inode->i_uid, fsuid)) | 2391 | if (uid_eq(inode->i_uid, fsuid)) |
2397 | return 0; | 2392 | return 0; |
2398 | if (uid_eq(dir->i_uid, fsuid)) | 2393 | if (uid_eq(dir->i_uid, fsuid)) |
2399 | return 0; | 2394 | return 0; |
2400 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); | 2395 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); |
2401 | } | 2396 | } |
2397 | EXPORT_SYMBOL(__check_sticky); | ||
2402 | 2398 | ||
2403 | /* | 2399 | /* |
2404 | * Check whether we can remove a link victim from directory dir, check | 2400 | * Check whether we can remove a link victim from directory dir, check |
@@ -2501,7 +2497,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) | |||
2501 | } | 2497 | } |
2502 | 2498 | ||
2503 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); | 2499 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); |
2504 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); | 2500 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2); |
2505 | return NULL; | 2501 | return NULL; |
2506 | } | 2502 | } |
2507 | EXPORT_SYMBOL(lock_rename); | 2503 | EXPORT_SYMBOL(lock_rename); |
@@ -3064,9 +3060,12 @@ finish_open_created: | |||
3064 | error = may_open(&nd->path, acc_mode, open_flag); | 3060 | error = may_open(&nd->path, acc_mode, open_flag); |
3065 | if (error) | 3061 | if (error) |
3066 | goto out; | 3062 | goto out; |
3067 | file->f_path.mnt = nd->path.mnt; | 3063 | |
3068 | error = finish_open(file, nd->path.dentry, NULL, opened); | 3064 | BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ |
3069 | if (error) { | 3065 | error = vfs_open(&nd->path, file, current_cred()); |
3066 | if (!error) { | ||
3067 | *opened |= FILE_OPENED; | ||
3068 | } else { | ||
3070 | if (error == -EOPENSTALE) | 3069 | if (error == -EOPENSTALE) |
3071 | goto stale_open; | 3070 | goto stale_open; |
3072 | goto out; | 3071 | goto out; |
@@ -3155,7 +3154,8 @@ static int do_tmpfile(int dfd, struct filename *pathname, | |||
3155 | if (error) | 3154 | if (error) |
3156 | goto out2; | 3155 | goto out2; |
3157 | audit_inode(pathname, nd->path.dentry, 0); | 3156 | audit_inode(pathname, nd->path.dentry, 0); |
3158 | error = may_open(&nd->path, op->acc_mode, op->open_flag); | 3157 | /* Don't check for other permissions, the inode was just created */ |
3158 | error = may_open(&nd->path, MAY_OPEN, op->open_flag); | ||
3159 | if (error) | 3159 | if (error) |
3160 | goto out2; | 3160 | goto out2; |
3161 | file->f_path.mnt = nd->path.mnt; | 3161 | file->f_path.mnt = nd->path.mnt; |
@@ -4210,12 +4210,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, | |||
4210 | bool should_retry = false; | 4210 | bool should_retry = false; |
4211 | int error; | 4211 | int error; |
4212 | 4212 | ||
4213 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 4213 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
4214 | return -EINVAL; | 4214 | return -EINVAL; |
4215 | 4215 | ||
4216 | if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) | 4216 | if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && |
4217 | (flags & RENAME_EXCHANGE)) | ||
4217 | return -EINVAL; | 4218 | return -EINVAL; |
4218 | 4219 | ||
4220 | if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) | ||
4221 | return -EPERM; | ||
4222 | |||
4219 | retry: | 4223 | retry: |
4220 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); | 4224 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); |
4221 | if (IS_ERR(from)) { | 4225 | if (IS_ERR(from)) { |
@@ -4347,6 +4351,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna | |||
4347 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); | 4351 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
4348 | } | 4352 | } |
4349 | 4353 | ||
4354 | int vfs_whiteout(struct inode *dir, struct dentry *dentry) | ||
4355 | { | ||
4356 | int error = may_create(dir, dentry); | ||
4357 | if (error) | ||
4358 | return error; | ||
4359 | |||
4360 | if (!dir->i_op->mknod) | ||
4361 | return -EPERM; | ||
4362 | |||
4363 | return dir->i_op->mknod(dir, dentry, | ||
4364 | S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); | ||
4365 | } | ||
4366 | EXPORT_SYMBOL(vfs_whiteout); | ||
4367 | |||
4350 | int readlink_copy(char __user *buffer, int buflen, const char *link) | 4368 | int readlink_copy(char __user *buffer, int buflen, const char *link) |
4351 | { | 4369 | { |
4352 | int len = PTR_ERR(link); | 4370 | int len = PTR_ERR(link); |
diff --git a/fs/namespace.c b/fs/namespace.c index fbba8b17330d..5b66b2b3624d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
1686 | namespace_unlock(); | 1686 | namespace_unlock(); |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | /** | ||
1690 | * clone_private_mount - create a private clone of a path | ||
1691 | * | ||
1692 | * This creates a new vfsmount, which will be the clone of @path. The new will | ||
1693 | * not be attached anywhere in the namespace and will be private (i.e. changes | ||
1694 | * to the originating mount won't be propagated into this). | ||
1695 | * | ||
1696 | * Release with mntput(). | ||
1697 | */ | ||
1698 | struct vfsmount *clone_private_mount(struct path *path) | ||
1699 | { | ||
1700 | struct mount *old_mnt = real_mount(path->mnt); | ||
1701 | struct mount *new_mnt; | ||
1702 | |||
1703 | if (IS_MNT_UNBINDABLE(old_mnt)) | ||
1704 | return ERR_PTR(-EINVAL); | ||
1705 | |||
1706 | down_read(&namespace_sem); | ||
1707 | new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); | ||
1708 | up_read(&namespace_sem); | ||
1709 | if (IS_ERR(new_mnt)) | ||
1710 | return ERR_CAST(new_mnt); | ||
1711 | |||
1712 | return &new_mnt->mnt; | ||
1713 | } | ||
1714 | EXPORT_SYMBOL_GPL(clone_private_mount); | ||
1715 | |||
1689 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, | 1716 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, |
1690 | struct vfsmount *root) | 1717 | struct vfsmount *root) |
1691 | { | 1718 | { |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 5228f201d3d5..4f46f7a05289 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) | |||
378 | loff_t offset = header->args.offset; | 378 | loff_t offset = header->args.offset; |
379 | size_t count = header->args.count; | 379 | size_t count = header->args.count; |
380 | struct page **pages = header->args.pages; | 380 | struct page **pages = header->args.pages; |
381 | int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; | 381 | int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
382 | unsigned int pg_len; | 382 | unsigned int pg_len; |
383 | struct blk_plug plug; | 383 | struct blk_plug plug; |
384 | int i; | 384 | int i; |
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index e966c023b1b7..acbf9ca4018c 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c | |||
@@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | |||
65 | 65 | ||
66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | 66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); |
67 | 67 | ||
68 | mutex_lock(&nn->bl_mutex); | ||
68 | bl_pipe_msg.bl_wq = &nn->bl_wq; | 69 | bl_pipe_msg.bl_wq = &nn->bl_wq; |
69 | 70 | ||
70 | b->simple.len += 4; /* single volume */ | 71 | b->simple.len += 4; /* single volume */ |
71 | if (b->simple.len > PAGE_SIZE) | 72 | if (b->simple.len > PAGE_SIZE) |
72 | return -EIO; | 73 | goto out_unlock; |
73 | 74 | ||
74 | memset(msg, 0, sizeof(*msg)); | 75 | memset(msg, 0, sizeof(*msg)); |
75 | msg->len = sizeof(*bl_msg) + b->simple.len; | 76 | msg->len = sizeof(*bl_msg) + b->simple.len; |
76 | msg->data = kzalloc(msg->len, gfp_mask); | 77 | msg->data = kzalloc(msg->len, gfp_mask); |
77 | if (!msg->data) | 78 | if (!msg->data) |
78 | goto out; | 79 | goto out_free_data; |
79 | 80 | ||
80 | bl_msg = msg->data; | 81 | bl_msg = msg->data; |
81 | bl_msg->type = BL_DEVICE_MOUNT, | 82 | bl_msg->type = BL_DEVICE_MOUNT, |
@@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | |||
87 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | 88 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); |
88 | if (rc < 0) { | 89 | if (rc < 0) { |
89 | remove_wait_queue(&nn->bl_wq, &wq); | 90 | remove_wait_queue(&nn->bl_wq, &wq); |
90 | goto out; | 91 | goto out_free_data; |
91 | } | 92 | } |
92 | 93 | ||
93 | set_current_state(TASK_UNINTERRUPTIBLE); | 94 | set_current_state(TASK_UNINTERRUPTIBLE); |
@@ -97,12 +98,14 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | |||
97 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | 98 | if (reply->status != BL_DEVICE_REQUEST_PROC) { |
98 | printk(KERN_WARNING "%s failed to decode device: %d\n", | 99 | printk(KERN_WARNING "%s failed to decode device: %d\n", |
99 | __func__, reply->status); | 100 | __func__, reply->status); |
100 | goto out; | 101 | goto out_free_data; |
101 | } | 102 | } |
102 | 103 | ||
103 | dev = MKDEV(reply->major, reply->minor); | 104 | dev = MKDEV(reply->major, reply->minor); |
104 | out: | 105 | out_free_data: |
105 | kfree(msg->data); | 106 | kfree(msg->data); |
107 | out_unlock: | ||
108 | mutex_unlock(&nn->bl_mutex); | ||
106 | return dev; | 109 | return dev; |
107 | } | 110 | } |
108 | 111 | ||
@@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net) | |||
232 | struct nfs_net *nn = net_generic(net, nfs_net_id); | 235 | struct nfs_net *nn = net_generic(net, nfs_net_id); |
233 | struct dentry *dentry; | 236 | struct dentry *dentry; |
234 | 237 | ||
238 | mutex_init(&nn->bl_mutex); | ||
235 | init_waitqueue_head(&nn->bl_wq); | 239 | init_waitqueue_head(&nn->bl_wq); |
236 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | 240 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); |
237 | if (IS_ERR(nn->bl_device_pipe)) | 241 | if (IS_ERR(nn->bl_device_pipe)) |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5853f53db732..7f3f60641344 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -125,6 +125,8 @@ again: | |||
125 | continue; | 125 | continue; |
126 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) | 126 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) |
127 | continue; | 127 | continue; |
128 | if (!nfs4_valid_open_stateid(state)) | ||
129 | continue; | ||
128 | if (!nfs4_stateid_match(&state->stateid, stateid)) | 130 | if (!nfs4_stateid_match(&state->stateid, stateid)) |
129 | continue; | 131 | continue; |
130 | get_nfs_open_context(ctx); | 132 | get_nfs_open_context(ctx); |
@@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * | |||
193 | { | 195 | { |
194 | int res = 0; | 196 | int res = 0; |
195 | 197 | ||
196 | res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); | 198 | if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) |
199 | res = nfs4_proc_delegreturn(inode, | ||
200 | delegation->cred, | ||
201 | &delegation->stateid, | ||
202 | issync); | ||
197 | nfs_free_delegation(delegation); | 203 | nfs_free_delegation(delegation); |
198 | return res; | 204 | return res; |
199 | } | 205 | } |
@@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation | |||
380 | { | 386 | { |
381 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 387 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; |
382 | struct nfs_inode *nfsi = NFS_I(inode); | 388 | struct nfs_inode *nfsi = NFS_I(inode); |
383 | int err; | 389 | int err = 0; |
384 | 390 | ||
385 | if (delegation == NULL) | 391 | if (delegation == NULL) |
386 | return 0; | 392 | return 0; |
387 | do { | 393 | do { |
394 | if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) | ||
395 | break; | ||
388 | err = nfs_delegation_claim_opens(inode, &delegation->stateid); | 396 | err = nfs_delegation_claim_opens(inode, &delegation->stateid); |
389 | if (!issync || err != -EAGAIN) | 397 | if (!issync || err != -EAGAIN) |
390 | break; | 398 | break; |
@@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl | |||
605 | rcu_read_unlock(); | 613 | rcu_read_unlock(); |
606 | } | 614 | } |
607 | 615 | ||
616 | static void nfs_revoke_delegation(struct inode *inode) | ||
617 | { | ||
618 | struct nfs_delegation *delegation; | ||
619 | rcu_read_lock(); | ||
620 | delegation = rcu_dereference(NFS_I(inode)->delegation); | ||
621 | if (delegation != NULL) { | ||
622 | set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); | ||
623 | nfs_mark_return_delegation(NFS_SERVER(inode), delegation); | ||
624 | } | ||
625 | rcu_read_unlock(); | ||
626 | } | ||
627 | |||
608 | void nfs_remove_bad_delegation(struct inode *inode) | 628 | void nfs_remove_bad_delegation(struct inode *inode) |
609 | { | 629 | { |
610 | struct nfs_delegation *delegation; | 630 | struct nfs_delegation *delegation; |
611 | 631 | ||
632 | nfs_revoke_delegation(inode); | ||
612 | delegation = nfs_inode_detach_delegation(inode); | 633 | delegation = nfs_inode_detach_delegation(inode); |
613 | if (delegation) { | 634 | if (delegation) { |
614 | nfs_inode_find_state_and_recover(inode, &delegation->stateid); | 635 | nfs_inode_find_state_and_recover(inode, &delegation->stateid); |
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5c1cce39297f..e3c20a3ccc93 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h | |||
@@ -31,6 +31,7 @@ enum { | |||
31 | NFS_DELEGATION_RETURN_IF_CLOSED, | 31 | NFS_DELEGATION_RETURN_IF_CLOSED, |
32 | NFS_DELEGATION_REFERENCED, | 32 | NFS_DELEGATION_REFERENCED, |
33 | NFS_DELEGATION_RETURNING, | 33 | NFS_DELEGATION_RETURNING, |
34 | NFS_DELEGATION_REVOKED, | ||
34 | }; | 35 | }; |
35 | 36 | ||
36 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); | 37 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 06e8cfcbb670..6e62155abf26 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |||
1527 | case -ENOENT: | 1527 | case -ENOENT: |
1528 | d_drop(dentry); | 1528 | d_drop(dentry); |
1529 | d_add(dentry, NULL); | 1529 | d_add(dentry, NULL); |
1530 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1530 | break; | 1531 | break; |
1531 | case -EISDIR: | 1532 | case -EISDIR: |
1532 | case -ENOTDIR: | 1533 | case -ENOTDIR: |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 20cffc830468..10bf07280f4a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref) | |||
266 | { | 266 | { |
267 | struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); | 267 | struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); |
268 | 268 | ||
269 | nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); | ||
269 | if (dreq->l_ctx != NULL) | 270 | if (dreq->l_ctx != NULL) |
270 | nfs_put_lock_context(dreq->l_ctx); | 271 | nfs_put_lock_context(dreq->l_ctx); |
271 | if (dreq->ctx != NULL) | 272 | if (dreq->ctx != NULL) |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 46fab1cb455a..7afb52f6a25a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
145 | case -NFS4ERR_DELEG_REVOKED: | 145 | case -NFS4ERR_DELEG_REVOKED: |
146 | case -NFS4ERR_ADMIN_REVOKED: | 146 | case -NFS4ERR_ADMIN_REVOKED: |
147 | case -NFS4ERR_BAD_STATEID: | 147 | case -NFS4ERR_BAD_STATEID: |
148 | if (state == NULL) | ||
149 | break; | ||
150 | nfs_remove_bad_delegation(state->inode); | ||
151 | case -NFS4ERR_OPENMODE: | 148 | case -NFS4ERR_OPENMODE: |
152 | if (state == NULL) | 149 | if (state == NULL) |
153 | break; | 150 | break; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6388a59f2add..00689a8a85e4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -626,7 +626,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
626 | { | 626 | { |
627 | struct inode *inode = dentry->d_inode; | 627 | struct inode *inode = dentry->d_inode; |
628 | int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; | 628 | int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; |
629 | int err; | 629 | int err = 0; |
630 | 630 | ||
631 | trace_nfs_getattr_enter(inode); | 631 | trace_nfs_getattr_enter(inode); |
632 | /* Flush out writes to the server in order to update c/mtime. */ | 632 | /* Flush out writes to the server in order to update c/mtime. */ |
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index ef221fb8a183..f0e06e4acbef 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h | |||
@@ -19,6 +19,7 @@ struct nfs_net { | |||
19 | struct rpc_pipe *bl_device_pipe; | 19 | struct rpc_pipe *bl_device_pipe; |
20 | struct bl_dev_msg bl_mount_reply; | 20 | struct bl_dev_msg bl_mount_reply; |
21 | wait_queue_head_t bl_wq; | 21 | wait_queue_head_t bl_wq; |
22 | struct mutex bl_mutex; | ||
22 | struct list_head nfs_client_list; | 23 | struct list_head nfs_client_list; |
23 | struct list_head nfs_volume_list; | 24 | struct list_head nfs_volume_list; |
24 | #if IS_ENABLED(CONFIG_NFS_V4) | 25 | #if IS_ENABLED(CONFIG_NFS_V4) |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 405bd95c1f58..69dc20a743f9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -370,11 +370,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc | |||
370 | case -NFS4ERR_DELEG_REVOKED: | 370 | case -NFS4ERR_DELEG_REVOKED: |
371 | case -NFS4ERR_ADMIN_REVOKED: | 371 | case -NFS4ERR_ADMIN_REVOKED: |
372 | case -NFS4ERR_BAD_STATEID: | 372 | case -NFS4ERR_BAD_STATEID: |
373 | if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { | ||
374 | nfs_remove_bad_delegation(inode); | ||
375 | exception->retry = 1; | ||
376 | break; | ||
377 | } | ||
378 | if (state == NULL) | 373 | if (state == NULL) |
379 | break; | 374 | break; |
380 | ret = nfs4_schedule_stateid_recovery(server, state); | 375 | ret = nfs4_schedule_stateid_recovery(server, state); |
@@ -1654,7 +1649,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct | |||
1654 | nfs_inode_find_state_and_recover(state->inode, | 1649 | nfs_inode_find_state_and_recover(state->inode, |
1655 | stateid); | 1650 | stateid); |
1656 | nfs4_schedule_stateid_recovery(server, state); | 1651 | nfs4_schedule_stateid_recovery(server, state); |
1657 | return 0; | 1652 | return -EAGAIN; |
1658 | case -NFS4ERR_DELAY: | 1653 | case -NFS4ERR_DELAY: |
1659 | case -NFS4ERR_GRACE: | 1654 | case -NFS4ERR_GRACE: |
1660 | set_bit(NFS_DELEGATED_STATE, &state->flags); | 1655 | set_bit(NFS_DELEGATED_STATE, &state->flags); |
@@ -2109,46 +2104,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta | |||
2109 | return ret; | 2104 | return ret; |
2110 | } | 2105 | } |
2111 | 2106 | ||
2107 | static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) | ||
2108 | { | ||
2109 | nfs_remove_bad_delegation(state->inode); | ||
2110 | write_seqlock(&state->seqlock); | ||
2111 | nfs4_stateid_copy(&state->stateid, &state->open_stateid); | ||
2112 | write_sequnlock(&state->seqlock); | ||
2113 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | ||
2114 | } | ||
2115 | |||
2116 | static void nfs40_clear_delegation_stateid(struct nfs4_state *state) | ||
2117 | { | ||
2118 | if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) | ||
2119 | nfs_finish_clear_delegation_stateid(state); | ||
2120 | } | ||
2121 | |||
2122 | static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) | ||
2123 | { | ||
2124 | /* NFSv4.0 doesn't allow for delegation recovery on open expire */ | ||
2125 | nfs40_clear_delegation_stateid(state); | ||
2126 | return nfs4_open_expired(sp, state); | ||
2127 | } | ||
2128 | |||
2112 | #if defined(CONFIG_NFS_V4_1) | 2129 | #if defined(CONFIG_NFS_V4_1) |
2113 | static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | 2130 | static void nfs41_check_delegation_stateid(struct nfs4_state *state) |
2114 | { | 2131 | { |
2115 | struct nfs_server *server = NFS_SERVER(state->inode); | 2132 | struct nfs_server *server = NFS_SERVER(state->inode); |
2116 | nfs4_stateid *stateid = &state->stateid; | 2133 | nfs4_stateid stateid; |
2117 | struct nfs_delegation *delegation; | 2134 | struct nfs_delegation *delegation; |
2118 | struct rpc_cred *cred = NULL; | 2135 | struct rpc_cred *cred; |
2119 | int status = -NFS4ERR_BAD_STATEID; | 2136 | int status; |
2120 | |||
2121 | /* If a state reset has been done, test_stateid is unneeded */ | ||
2122 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | ||
2123 | return; | ||
2124 | 2137 | ||
2125 | /* Get the delegation credential for use by test/free_stateid */ | 2138 | /* Get the delegation credential for use by test/free_stateid */ |
2126 | rcu_read_lock(); | 2139 | rcu_read_lock(); |
2127 | delegation = rcu_dereference(NFS_I(state->inode)->delegation); | 2140 | delegation = rcu_dereference(NFS_I(state->inode)->delegation); |
2128 | if (delegation != NULL && | 2141 | if (delegation == NULL) { |
2129 | nfs4_stateid_match(&delegation->stateid, stateid)) { | ||
2130 | cred = get_rpccred(delegation->cred); | ||
2131 | rcu_read_unlock(); | ||
2132 | status = nfs41_test_stateid(server, stateid, cred); | ||
2133 | trace_nfs4_test_delegation_stateid(state, NULL, status); | ||
2134 | } else | ||
2135 | rcu_read_unlock(); | 2142 | rcu_read_unlock(); |
2143 | return; | ||
2144 | } | ||
2145 | |||
2146 | nfs4_stateid_copy(&stateid, &delegation->stateid); | ||
2147 | cred = get_rpccred(delegation->cred); | ||
2148 | rcu_read_unlock(); | ||
2149 | status = nfs41_test_stateid(server, &stateid, cred); | ||
2150 | trace_nfs4_test_delegation_stateid(state, NULL, status); | ||
2136 | 2151 | ||
2137 | if (status != NFS_OK) { | 2152 | if (status != NFS_OK) { |
2138 | /* Free the stateid unless the server explicitly | 2153 | /* Free the stateid unless the server explicitly |
2139 | * informs us the stateid is unrecognized. */ | 2154 | * informs us the stateid is unrecognized. */ |
2140 | if (status != -NFS4ERR_BAD_STATEID) | 2155 | if (status != -NFS4ERR_BAD_STATEID) |
2141 | nfs41_free_stateid(server, stateid, cred); | 2156 | nfs41_free_stateid(server, &stateid, cred); |
2142 | nfs_remove_bad_delegation(state->inode); | 2157 | nfs_finish_clear_delegation_stateid(state); |
2143 | |||
2144 | write_seqlock(&state->seqlock); | ||
2145 | nfs4_stateid_copy(&state->stateid, &state->open_stateid); | ||
2146 | write_sequnlock(&state->seqlock); | ||
2147 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | ||
2148 | } | 2158 | } |
2149 | 2159 | ||
2150 | if (cred != NULL) | 2160 | put_rpccred(cred); |
2151 | put_rpccred(cred); | ||
2152 | } | 2161 | } |
2153 | 2162 | ||
2154 | /** | 2163 | /** |
@@ -2192,7 +2201,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st | |||
2192 | { | 2201 | { |
2193 | int status; | 2202 | int status; |
2194 | 2203 | ||
2195 | nfs41_clear_delegation_stateid(state); | 2204 | nfs41_check_delegation_stateid(state); |
2196 | status = nfs41_check_open_stateid(state); | 2205 | status = nfs41_check_open_stateid(state); |
2197 | if (status != NFS_OK) | 2206 | if (status != NFS_OK) |
2198 | status = nfs4_open_expired(sp, state); | 2207 | status = nfs4_open_expired(sp, state); |
@@ -2231,19 +2240,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |||
2231 | seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | 2240 | seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); |
2232 | 2241 | ||
2233 | ret = _nfs4_proc_open(opendata); | 2242 | ret = _nfs4_proc_open(opendata); |
2234 | if (ret != 0) { | 2243 | if (ret != 0) |
2235 | if (ret == -ENOENT) { | ||
2236 | dentry = opendata->dentry; | ||
2237 | if (dentry->d_inode) | ||
2238 | d_delete(dentry); | ||
2239 | else if (d_unhashed(dentry)) | ||
2240 | d_add(dentry, NULL); | ||
2241 | |||
2242 | nfs_set_verifier(dentry, | ||
2243 | nfs_save_change_attribute(opendata->dir->d_inode)); | ||
2244 | } | ||
2245 | goto out; | 2244 | goto out; |
2246 | } | ||
2247 | 2245 | ||
2248 | state = nfs4_opendata_to_nfs4_state(opendata); | 2246 | state = nfs4_opendata_to_nfs4_state(opendata); |
2249 | ret = PTR_ERR(state); | 2247 | ret = PTR_ERR(state); |
@@ -4841,9 +4839,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
4841 | case -NFS4ERR_DELEG_REVOKED: | 4839 | case -NFS4ERR_DELEG_REVOKED: |
4842 | case -NFS4ERR_ADMIN_REVOKED: | 4840 | case -NFS4ERR_ADMIN_REVOKED: |
4843 | case -NFS4ERR_BAD_STATEID: | 4841 | case -NFS4ERR_BAD_STATEID: |
4844 | if (state == NULL) | ||
4845 | break; | ||
4846 | nfs_remove_bad_delegation(state->inode); | ||
4847 | case -NFS4ERR_OPENMODE: | 4842 | case -NFS4ERR_OPENMODE: |
4848 | if (state == NULL) | 4843 | if (state == NULL) |
4849 | break; | 4844 | break; |
@@ -8341,7 +8336,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { | |||
8341 | static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { | 8336 | static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { |
8342 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, | 8337 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, |
8343 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, | 8338 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, |
8344 | .recover_open = nfs4_open_expired, | 8339 | .recover_open = nfs40_open_expired, |
8345 | .recover_lock = nfs4_lock_expired, | 8340 | .recover_lock = nfs4_lock_expired, |
8346 | .establish_clid = nfs4_init_clientid, | 8341 | .establish_clid = nfs4_init_clientid, |
8347 | }; | 8342 | }; |
@@ -8408,8 +8403,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | |||
8408 | | NFS_CAP_CHANGE_ATTR | 8403 | | NFS_CAP_CHANGE_ATTR |
8409 | | NFS_CAP_POSIX_LOCK | 8404 | | NFS_CAP_POSIX_LOCK |
8410 | | NFS_CAP_STATEID_NFSV41 | 8405 | | NFS_CAP_STATEID_NFSV41 |
8411 | | NFS_CAP_ATOMIC_OPEN_V1 | 8406 | | NFS_CAP_ATOMIC_OPEN_V1, |
8412 | | NFS_CAP_SEEK, | ||
8413 | .init_client = nfs41_init_client, | 8407 | .init_client = nfs41_init_client, |
8414 | .shutdown_client = nfs41_shutdown_client, | 8408 | .shutdown_client = nfs41_shutdown_client, |
8415 | .match_stateid = nfs41_match_stateid, | 8409 | .match_stateid = nfs41_match_stateid, |
@@ -8431,7 +8425,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | |||
8431 | | NFS_CAP_CHANGE_ATTR | 8425 | | NFS_CAP_CHANGE_ATTR |
8432 | | NFS_CAP_POSIX_LOCK | 8426 | | NFS_CAP_POSIX_LOCK |
8433 | | NFS_CAP_STATEID_NFSV41 | 8427 | | NFS_CAP_STATEID_NFSV41 |
8434 | | NFS_CAP_ATOMIC_OPEN_V1, | 8428 | | NFS_CAP_ATOMIC_OPEN_V1 |
8429 | | NFS_CAP_SEEK, | ||
8435 | .init_client = nfs41_init_client, | 8430 | .init_client = nfs41_init_client, |
8436 | .shutdown_client = nfs41_shutdown_client, | 8431 | .shutdown_client = nfs41_shutdown_client, |
8437 | .match_stateid = nfs41_match_stateid, | 8432 | .match_stateid = nfs41_match_stateid, |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c6e4bda63000..9e5bc42180e4 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * All rights reserved. | 5 | * All rights reserved. |
6 | * | 6 | * |
7 | * Benny Halevy <bhalevy@panasas.com> | 7 | * Benny Halevy <bhalevy@panasas.com> |
8 | * Boaz Harrosh <bharrosh@panasas.com> | 8 | * Boaz Harrosh <ooo@electrozaur.com> |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2 | 11 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index c89357c7a914..919efd4a1a23 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * All rights reserved. | 5 | * All rights reserved. |
6 | * | 6 | * |
7 | * Benny Halevy <bhalevy@panasas.com> | 7 | * Benny Halevy <bhalevy@panasas.com> |
8 | * Boaz Harrosh <bharrosh@panasas.com> | 8 | * Boaz Harrosh <ooo@electrozaur.com> |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2 | 11 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 3a0828d57339..2641dbad345c 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -6,7 +6,7 @@ | |||
6 | * All rights reserved. | 6 | * All rights reserved. |
7 | * | 7 | * |
8 | * Benny Halevy <bhalevy@panasas.com> | 8 | * Benny Halevy <bhalevy@panasas.com> |
9 | * Boaz Harrosh <bharrosh@panasas.com> | 9 | * Boaz Harrosh <ooo@electrozaur.com> |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or modify | 11 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License version 2 | 12 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index b3918f7ac34d..f093c7ec983b 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * All rights reserved. | 5 | * All rights reserved. |
6 | * | 6 | * |
7 | * Benny Halevy <bhalevy@panasas.com> | 7 | * Benny Halevy <bhalevy@panasas.com> |
8 | * Boaz Harrosh <bharrosh@panasas.com> | 8 | * Boaz Harrosh <ooo@electrozaur.com> |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2 | 11 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 12493846a2d3..f83b02dc9166 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -715,8 +715,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
715 | 715 | ||
716 | if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) | 716 | if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) |
717 | nfs_release_request(req); | 717 | nfs_release_request(req); |
718 | else | ||
719 | WARN_ON_ONCE(1); | ||
720 | } | 718 | } |
721 | 719 | ||
722 | static void | 720 | static void |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 747f3b95bd11..33a46a8dfaf7 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -335,12 +335,15 @@ void nfsd_lockd_shutdown(void); | |||
335 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | 335 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) |
336 | 336 | ||
337 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 337 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
338 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ | 338 | #define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL |
339 | (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) | ||
340 | #else | 339 | #else |
341 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 | 340 | #define NFSD4_2_SECURITY_ATTRS 0 |
342 | #endif | 341 | #endif |
343 | 342 | ||
343 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ | ||
344 | (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ | ||
345 | NFSD4_2_SECURITY_ATTRS) | ||
346 | |||
344 | static inline u32 nfsd_suppattrs0(u32 minorversion) | 347 | static inline u32 nfsd_suppattrs0(u32 minorversion) |
345 | { | 348 | { |
346 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 | 349 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9d3e9c50066a..89326acd4561 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
229 | &fsnotify_mark_srcu); | 229 | &fsnotify_mark_srcu); |
230 | } | 230 | } |
231 | 231 | ||
232 | /* | ||
233 | * We need to merge inode & vfsmount mark lists so that inode mark | ||
234 | * ignore masks are properly reflected for mount mark notifications. | ||
235 | * That's why this traversal is so complicated... | ||
236 | */ | ||
232 | while (inode_node || vfsmount_node) { | 237 | while (inode_node || vfsmount_node) { |
233 | inode_group = vfsmount_group = NULL; | 238 | inode_group = NULL; |
239 | inode_mark = NULL; | ||
240 | vfsmount_group = NULL; | ||
241 | vfsmount_mark = NULL; | ||
234 | 242 | ||
235 | if (inode_node) { | 243 | if (inode_node) { |
236 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), | 244 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), |
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
244 | vfsmount_group = vfsmount_mark->group; | 252 | vfsmount_group = vfsmount_mark->group; |
245 | } | 253 | } |
246 | 254 | ||
247 | if (inode_group > vfsmount_group) { | 255 | if (inode_group && vfsmount_group) { |
248 | /* handle inode */ | 256 | int cmp = fsnotify_compare_groups(inode_group, |
249 | ret = send_to_group(to_tell, inode_mark, NULL, mask, | 257 | vfsmount_group); |
250 | data, data_is, cookie, file_name); | 258 | if (cmp > 0) { |
251 | /* we didn't use the vfsmount_mark */ | 259 | inode_group = NULL; |
252 | vfsmount_group = NULL; | 260 | inode_mark = NULL; |
253 | } else if (vfsmount_group > inode_group) { | 261 | } else if (cmp < 0) { |
254 | ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, | 262 | vfsmount_group = NULL; |
255 | data, data_is, cookie, file_name); | 263 | vfsmount_mark = NULL; |
256 | inode_group = NULL; | 264 | } |
257 | } else { | ||
258 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, | ||
259 | mask, data, data_is, cookie, | ||
260 | file_name); | ||
261 | } | 265 | } |
266 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, | ||
267 | data, data_is, cookie, file_name); | ||
262 | 268 | ||
263 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) | 269 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) |
264 | goto out; | 270 | goto out; |
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 9c0898c4cfe1..3b68b0ae0a97 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h | |||
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); | |||
12 | /* protects reads of inode and vfsmount marks list */ | 12 | /* protects reads of inode and vfsmount marks list */ |
13 | extern struct srcu_struct fsnotify_mark_srcu; | 13 | extern struct srcu_struct fsnotify_mark_srcu; |
14 | 14 | ||
15 | /* compare two groups for sorting of marks lists */ | ||
16 | extern int fsnotify_compare_groups(struct fsnotify_group *a, | ||
17 | struct fsnotify_group *b); | ||
18 | |||
15 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, | 19 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, |
16 | __u32 mask); | 20 | __u32 mask); |
17 | /* add a mark to an inode */ | 21 | /* add a mark to an inode */ |
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 9ce062218de9..dfbf5447eea4 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, | |||
194 | { | 194 | { |
195 | struct fsnotify_mark *lmark, *last = NULL; | 195 | struct fsnotify_mark *lmark, *last = NULL; |
196 | int ret = 0; | 196 | int ret = 0; |
197 | int cmp; | ||
197 | 198 | ||
198 | mark->flags |= FSNOTIFY_MARK_FLAG_INODE; | 199 | mark->flags |= FSNOTIFY_MARK_FLAG_INODE; |
199 | 200 | ||
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, | |||
219 | goto out; | 220 | goto out; |
220 | } | 221 | } |
221 | 222 | ||
222 | if (mark->group->priority < lmark->group->priority) | 223 | cmp = fsnotify_compare_groups(lmark->group, mark->group); |
223 | continue; | 224 | if (cmp < 0) |
224 | |||
225 | if ((mark->group->priority == lmark->group->priority) && | ||
226 | (mark->group < lmark->group)) | ||
227 | continue; | 225 | continue; |
228 | 226 | ||
229 | hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); | 227 | hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); |
@@ -288,20 +286,25 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
288 | spin_unlock(&inode->i_lock); | 286 | spin_unlock(&inode->i_lock); |
289 | 287 | ||
290 | /* In case the dropping of a reference would nuke next_i. */ | 288 | /* In case the dropping of a reference would nuke next_i. */ |
291 | if ((&next_i->i_sb_list != list) && | 289 | while (&next_i->i_sb_list != list) { |
292 | atomic_read(&next_i->i_count)) { | ||
293 | spin_lock(&next_i->i_lock); | 290 | spin_lock(&next_i->i_lock); |
294 | if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { | 291 | if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && |
292 | atomic_read(&next_i->i_count)) { | ||
295 | __iget(next_i); | 293 | __iget(next_i); |
296 | need_iput = next_i; | 294 | need_iput = next_i; |
295 | spin_unlock(&next_i->i_lock); | ||
296 | break; | ||
297 | } | 297 | } |
298 | spin_unlock(&next_i->i_lock); | 298 | spin_unlock(&next_i->i_lock); |
299 | next_i = list_entry(next_i->i_sb_list.next, | ||
300 | struct inode, i_sb_list); | ||
299 | } | 301 | } |
300 | 302 | ||
301 | /* | 303 | /* |
302 | * We can safely drop inode_sb_list_lock here because we hold | 304 | * We can safely drop inode_sb_list_lock here because either |
303 | * references on both inode and next_i. Also no new inodes | 305 | * we actually hold references on both inode and next_i or |
304 | * will be added since the umount has begun. | 306 | * end of list. Also no new inodes will be added since the |
307 | * umount has begun. | ||
305 | */ | 308 | */ |
306 | spin_unlock(&inode_sb_list_lock); | 309 | spin_unlock(&inode_sb_list_lock); |
307 | 310 | ||
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d90deaa08e78..34c38fabf514 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas | |||
210 | } | 210 | } |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * Sorting function for lists of fsnotify marks. | ||
214 | * | ||
215 | * Fanotify supports different notification classes (reflected as priority of | ||
216 | * notification group). Events shall be passed to notification groups in | ||
217 | * decreasing priority order. To achieve this marks in notification lists for | ||
218 | * inodes and vfsmounts are sorted so that priorities of corresponding groups | ||
219 | * are descending. | ||
220 | * | ||
221 | * Furthermore correct handling of the ignore mask requires processing inode | ||
222 | * and vfsmount marks of each group together. Using the group address as | ||
223 | * further sort criterion provides a unique sorting order and thus we can | ||
224 | * merge inode and vfsmount lists of marks in linear time and find groups | ||
225 | * present in both lists. | ||
226 | * | ||
227 | * A return value of 1 signifies that b has priority over a. | ||
228 | * A return value of 0 signifies that the two marks have to be handled together. | ||
229 | * A return value of -1 signifies that a has priority over b. | ||
230 | */ | ||
231 | int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) | ||
232 | { | ||
233 | if (a == b) | ||
234 | return 0; | ||
235 | if (!a) | ||
236 | return 1; | ||
237 | if (!b) | ||
238 | return -1; | ||
239 | if (a->priority < b->priority) | ||
240 | return 1; | ||
241 | if (a->priority > b->priority) | ||
242 | return -1; | ||
243 | if (a < b) | ||
244 | return 1; | ||
245 | return -1; | ||
246 | } | ||
247 | |||
248 | /* | ||
213 | * Attach an initialized mark to a given group and fs object. | 249 | * Attach an initialized mark to a given group and fs object. |
214 | * These marks may be used for the fsnotify backend to determine which | 250 | * These marks may be used for the fsnotify backend to determine which |
215 | * event types should be delivered to which group. | 251 | * event types should be delivered to which group. |
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index ac851e8376b1..faefa72a11eb 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c | |||
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | |||
153 | struct mount *m = real_mount(mnt); | 153 | struct mount *m = real_mount(mnt); |
154 | struct fsnotify_mark *lmark, *last = NULL; | 154 | struct fsnotify_mark *lmark, *last = NULL; |
155 | int ret = 0; | 155 | int ret = 0; |
156 | int cmp; | ||
156 | 157 | ||
157 | mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; | 158 | mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; |
158 | 159 | ||
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | |||
178 | goto out; | 179 | goto out; |
179 | } | 180 | } |
180 | 181 | ||
181 | if (mark->group->priority < lmark->group->priority) | 182 | cmp = fsnotify_compare_groups(lmark->group, mark->group); |
182 | continue; | 183 | if (cmp < 0) |
183 | |||
184 | if ((mark->group->priority == lmark->group->priority) && | ||
185 | (mark->group < lmark->group)) | ||
186 | continue; | 184 | continue; |
187 | 185 | ||
188 | hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); | 186 | hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 97de0fbd9f78..a96044004064 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, | |||
925 | size_t veclen, size_t total) | 925 | size_t veclen, size_t total) |
926 | { | 926 | { |
927 | int ret; | 927 | int ret; |
928 | struct msghdr msg; | 928 | struct msghdr msg = {.msg_flags = 0,}; |
929 | 929 | ||
930 | if (sock == NULL) { | 930 | if (sock == NULL) { |
931 | ret = -EINVAL; | 931 | ret = -EINVAL; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8add6f1030d7..b931e04e3388 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -158,7 +158,7 @@ bail_add: | |||
158 | * NOTE: This dentry already has ->d_op set from | 158 | * NOTE: This dentry already has ->d_op set from |
159 | * ocfs2_get_parent() and ocfs2_get_dentry() | 159 | * ocfs2_get_parent() and ocfs2_get_dentry() |
160 | */ | 160 | */ |
161 | if (ret) | 161 | if (!IS_ERR_OR_NULL(ret)) |
162 | dentry = ret; | 162 | dentry = ret; |
163 | 163 | ||
164 | status = ocfs2_dentry_attach_lock(dentry, inode, | 164 | status = ocfs2_dentry_attach_lock(dentry, inode, |
@@ -824,8 +824,7 @@ struct file *dentry_open(const struct path *path, int flags, | |||
824 | f = get_empty_filp(); | 824 | f = get_empty_filp(); |
825 | if (!IS_ERR(f)) { | 825 | if (!IS_ERR(f)) { |
826 | f->f_flags = flags; | 826 | f->f_flags = flags; |
827 | f->f_path = *path; | 827 | error = vfs_open(path, f, cred); |
828 | error = do_dentry_open(f, NULL, cred); | ||
829 | if (!error) { | 828 | if (!error) { |
830 | /* from now on we need fput() to dispose of f */ | 829 | /* from now on we need fput() to dispose of f */ |
831 | error = open_check_o_direct(f); | 830 | error = open_check_o_direct(f); |
@@ -842,6 +841,26 @@ struct file *dentry_open(const struct path *path, int flags, | |||
842 | } | 841 | } |
843 | EXPORT_SYMBOL(dentry_open); | 842 | EXPORT_SYMBOL(dentry_open); |
844 | 843 | ||
844 | /** | ||
845 | * vfs_open - open the file at the given path | ||
846 | * @path: path to open | ||
847 | * @filp: newly allocated file with f_flag initialized | ||
848 | * @cred: credentials to use | ||
849 | */ | ||
850 | int vfs_open(const struct path *path, struct file *filp, | ||
851 | const struct cred *cred) | ||
852 | { | ||
853 | struct inode *inode = path->dentry->d_inode; | ||
854 | |||
855 | if (inode->i_op->dentry_open) | ||
856 | return inode->i_op->dentry_open(path->dentry, filp, cred); | ||
857 | else { | ||
858 | filp->f_path = *path; | ||
859 | return do_dentry_open(filp, NULL, cred); | ||
860 | } | ||
861 | } | ||
862 | EXPORT_SYMBOL(vfs_open); | ||
863 | |||
845 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) | 864 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
846 | { | 865 | { |
847 | int lookup_flags = 0; | 866 | int lookup_flags = 0; |
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig new file mode 100644 index 000000000000..e60125976873 --- /dev/null +++ b/fs/overlayfs/Kconfig | |||
@@ -0,0 +1,10 @@ | |||
1 | config OVERLAYFS_FS | ||
2 | tristate "Overlay filesystem support" | ||
3 | help | ||
4 | An overlay filesystem combines two filesystems - an 'upper' filesystem | ||
5 | and a 'lower' filesystem. When a name exists in both filesystems, the | ||
6 | object in the 'upper' filesystem is visible while the object in the | ||
7 | 'lower' filesystem is either hidden or, in the case of directories, | ||
8 | merged with the 'upper' object. | ||
9 | |||
10 | For more information see Documentation/filesystems/overlayfs.txt | ||
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile new file mode 100644 index 000000000000..8f91889480d0 --- /dev/null +++ b/fs/overlayfs/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for the overlay filesystem. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o | ||
6 | |||
7 | overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o | ||
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c new file mode 100644 index 000000000000..ea10a8719107 --- /dev/null +++ b/fs/overlayfs/copy_up.c | |||
@@ -0,0 +1,414 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/file.h> | ||
13 | #include <linux/splice.h> | ||
14 | #include <linux/xattr.h> | ||
15 | #include <linux/security.h> | ||
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/namei.h> | ||
19 | #include "overlayfs.h" | ||
20 | |||
21 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) | ||
22 | |||
23 | int ovl_copy_xattr(struct dentry *old, struct dentry *new) | ||
24 | { | ||
25 | ssize_t list_size, size; | ||
26 | char *buf, *name, *value; | ||
27 | int error; | ||
28 | |||
29 | if (!old->d_inode->i_op->getxattr || | ||
30 | !new->d_inode->i_op->getxattr) | ||
31 | return 0; | ||
32 | |||
33 | list_size = vfs_listxattr(old, NULL, 0); | ||
34 | if (list_size <= 0) { | ||
35 | if (list_size == -EOPNOTSUPP) | ||
36 | return 0; | ||
37 | return list_size; | ||
38 | } | ||
39 | |||
40 | buf = kzalloc(list_size, GFP_KERNEL); | ||
41 | if (!buf) | ||
42 | return -ENOMEM; | ||
43 | |||
44 | error = -ENOMEM; | ||
45 | value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); | ||
46 | if (!value) | ||
47 | goto out; | ||
48 | |||
49 | list_size = vfs_listxattr(old, buf, list_size); | ||
50 | if (list_size <= 0) { | ||
51 | error = list_size; | ||
52 | goto out_free_value; | ||
53 | } | ||
54 | |||
55 | for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { | ||
56 | size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); | ||
57 | if (size <= 0) { | ||
58 | error = size; | ||
59 | goto out_free_value; | ||
60 | } | ||
61 | error = vfs_setxattr(new, name, value, size, 0); | ||
62 | if (error) | ||
63 | goto out_free_value; | ||
64 | } | ||
65 | |||
66 | out_free_value: | ||
67 | kfree(value); | ||
68 | out: | ||
69 | kfree(buf); | ||
70 | return error; | ||
71 | } | ||
72 | |||
73 | static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) | ||
74 | { | ||
75 | struct file *old_file; | ||
76 | struct file *new_file; | ||
77 | loff_t old_pos = 0; | ||
78 | loff_t new_pos = 0; | ||
79 | int error = 0; | ||
80 | |||
81 | if (len == 0) | ||
82 | return 0; | ||
83 | |||
84 | old_file = ovl_path_open(old, O_RDONLY); | ||
85 | if (IS_ERR(old_file)) | ||
86 | return PTR_ERR(old_file); | ||
87 | |||
88 | new_file = ovl_path_open(new, O_WRONLY); | ||
89 | if (IS_ERR(new_file)) { | ||
90 | error = PTR_ERR(new_file); | ||
91 | goto out_fput; | ||
92 | } | ||
93 | |||
94 | /* FIXME: copy up sparse files efficiently */ | ||
95 | while (len) { | ||
96 | size_t this_len = OVL_COPY_UP_CHUNK_SIZE; | ||
97 | long bytes; | ||
98 | |||
99 | if (len < this_len) | ||
100 | this_len = len; | ||
101 | |||
102 | if (signal_pending_state(TASK_KILLABLE, current)) { | ||
103 | error = -EINTR; | ||
104 | break; | ||
105 | } | ||
106 | |||
107 | bytes = do_splice_direct(old_file, &old_pos, | ||
108 | new_file, &new_pos, | ||
109 | this_len, SPLICE_F_MOVE); | ||
110 | if (bytes <= 0) { | ||
111 | error = bytes; | ||
112 | break; | ||
113 | } | ||
114 | WARN_ON(old_pos != new_pos); | ||
115 | |||
116 | len -= bytes; | ||
117 | } | ||
118 | |||
119 | fput(new_file); | ||
120 | out_fput: | ||
121 | fput(old_file); | ||
122 | return error; | ||
123 | } | ||
124 | |||
125 | static char *ovl_read_symlink(struct dentry *realdentry) | ||
126 | { | ||
127 | int res; | ||
128 | char *buf; | ||
129 | struct inode *inode = realdentry->d_inode; | ||
130 | mm_segment_t old_fs; | ||
131 | |||
132 | res = -EINVAL; | ||
133 | if (!inode->i_op->readlink) | ||
134 | goto err; | ||
135 | |||
136 | res = -ENOMEM; | ||
137 | buf = (char *) __get_free_page(GFP_KERNEL); | ||
138 | if (!buf) | ||
139 | goto err; | ||
140 | |||
141 | old_fs = get_fs(); | ||
142 | set_fs(get_ds()); | ||
143 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
144 | res = inode->i_op->readlink(realdentry, | ||
145 | (char __user *)buf, PAGE_SIZE - 1); | ||
146 | set_fs(old_fs); | ||
147 | if (res < 0) { | ||
148 | free_page((unsigned long) buf); | ||
149 | goto err; | ||
150 | } | ||
151 | buf[res] = '\0'; | ||
152 | |||
153 | return buf; | ||
154 | |||
155 | err: | ||
156 | return ERR_PTR(res); | ||
157 | } | ||
158 | |||
159 | static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) | ||
160 | { | ||
161 | struct iattr attr = { | ||
162 | .ia_valid = | ||
163 | ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, | ||
164 | .ia_atime = stat->atime, | ||
165 | .ia_mtime = stat->mtime, | ||
166 | }; | ||
167 | |||
168 | return notify_change(upperdentry, &attr, NULL); | ||
169 | } | ||
170 | |||
171 | int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) | ||
172 | { | ||
173 | int err = 0; | ||
174 | |||
175 | if (!S_ISLNK(stat->mode)) { | ||
176 | struct iattr attr = { | ||
177 | .ia_valid = ATTR_MODE, | ||
178 | .ia_mode = stat->mode, | ||
179 | }; | ||
180 | err = notify_change(upperdentry, &attr, NULL); | ||
181 | } | ||
182 | if (!err) { | ||
183 | struct iattr attr = { | ||
184 | .ia_valid = ATTR_UID | ATTR_GID, | ||
185 | .ia_uid = stat->uid, | ||
186 | .ia_gid = stat->gid, | ||
187 | }; | ||
188 | err = notify_change(upperdentry, &attr, NULL); | ||
189 | } | ||
190 | if (!err) | ||
191 | ovl_set_timestamps(upperdentry, stat); | ||
192 | |||
193 | return err; | ||
194 | |||
195 | } | ||
196 | |||
197 | static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, | ||
198 | struct dentry *dentry, struct path *lowerpath, | ||
199 | struct kstat *stat, struct iattr *attr, | ||
200 | const char *link) | ||
201 | { | ||
202 | struct inode *wdir = workdir->d_inode; | ||
203 | struct inode *udir = upperdir->d_inode; | ||
204 | struct dentry *newdentry = NULL; | ||
205 | struct dentry *upper = NULL; | ||
206 | umode_t mode = stat->mode; | ||
207 | int err; | ||
208 | |||
209 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
210 | err = PTR_ERR(newdentry); | ||
211 | if (IS_ERR(newdentry)) | ||
212 | goto out; | ||
213 | |||
214 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
215 | dentry->d_name.len); | ||
216 | err = PTR_ERR(upper); | ||
217 | if (IS_ERR(upper)) | ||
218 | goto out1; | ||
219 | |||
220 | /* Can't properly set mode on creation because of the umask */ | ||
221 | stat->mode &= S_IFMT; | ||
222 | err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); | ||
223 | stat->mode = mode; | ||
224 | if (err) | ||
225 | goto out2; | ||
226 | |||
227 | if (S_ISREG(stat->mode)) { | ||
228 | struct path upperpath; | ||
229 | ovl_path_upper(dentry, &upperpath); | ||
230 | BUG_ON(upperpath.dentry != NULL); | ||
231 | upperpath.dentry = newdentry; | ||
232 | |||
233 | err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); | ||
234 | if (err) | ||
235 | goto out_cleanup; | ||
236 | } | ||
237 | |||
238 | err = ovl_copy_xattr(lowerpath->dentry, newdentry); | ||
239 | if (err) | ||
240 | goto out_cleanup; | ||
241 | |||
242 | mutex_lock(&newdentry->d_inode->i_mutex); | ||
243 | err = ovl_set_attr(newdentry, stat); | ||
244 | if (!err && attr) | ||
245 | err = notify_change(newdentry, attr, NULL); | ||
246 | mutex_unlock(&newdentry->d_inode->i_mutex); | ||
247 | if (err) | ||
248 | goto out_cleanup; | ||
249 | |||
250 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
251 | if (err) | ||
252 | goto out_cleanup; | ||
253 | |||
254 | ovl_dentry_update(dentry, newdentry); | ||
255 | newdentry = NULL; | ||
256 | |||
257 | /* | ||
258 | * Non-directores become opaque when copied up. | ||
259 | */ | ||
260 | if (!S_ISDIR(stat->mode)) | ||
261 | ovl_dentry_set_opaque(dentry, true); | ||
262 | out2: | ||
263 | dput(upper); | ||
264 | out1: | ||
265 | dput(newdentry); | ||
266 | out: | ||
267 | return err; | ||
268 | |||
269 | out_cleanup: | ||
270 | ovl_cleanup(wdir, newdentry); | ||
271 | goto out; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Copy up a single dentry | ||
276 | * | ||
277 | * Directory renames only allowed on "pure upper" (already created on | ||
278 | * upper filesystem, never copied up). Directories which are on lower or | ||
279 | * are merged may not be renamed. For these -EXDEV is returned and | ||
280 | * userspace has to deal with it. This means, when copying up a | ||
281 | * directory we can rely on it and ancestors being stable. | ||
282 | * | ||
283 | * Non-directory renames start with copy up of source if necessary. The | ||
284 | * actual rename will only proceed once the copy up was successful. Copy | ||
285 | * up uses upper parent i_mutex for exclusion. Since rename can change | ||
286 | * d_parent it is possible that the copy up will lock the old parent. At | ||
287 | * that point the file will have already been copied up anyway. | ||
288 | */ | ||
289 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
290 | struct path *lowerpath, struct kstat *stat, | ||
291 | struct iattr *attr) | ||
292 | { | ||
293 | struct dentry *workdir = ovl_workdir(dentry); | ||
294 | int err; | ||
295 | struct kstat pstat; | ||
296 | struct path parentpath; | ||
297 | struct dentry *upperdir; | ||
298 | struct dentry *upperdentry; | ||
299 | const struct cred *old_cred; | ||
300 | struct cred *override_cred; | ||
301 | char *link = NULL; | ||
302 | |||
303 | ovl_path_upper(parent, &parentpath); | ||
304 | upperdir = parentpath.dentry; | ||
305 | |||
306 | err = vfs_getattr(&parentpath, &pstat); | ||
307 | if (err) | ||
308 | return err; | ||
309 | |||
310 | if (S_ISLNK(stat->mode)) { | ||
311 | link = ovl_read_symlink(lowerpath->dentry); | ||
312 | if (IS_ERR(link)) | ||
313 | return PTR_ERR(link); | ||
314 | } | ||
315 | |||
316 | err = -ENOMEM; | ||
317 | override_cred = prepare_creds(); | ||
318 | if (!override_cred) | ||
319 | goto out_free_link; | ||
320 | |||
321 | override_cred->fsuid = stat->uid; | ||
322 | override_cred->fsgid = stat->gid; | ||
323 | /* | ||
324 | * CAP_SYS_ADMIN for copying up extended attributes | ||
325 | * CAP_DAC_OVERRIDE for create | ||
326 | * CAP_FOWNER for chmod, timestamp update | ||
327 | * CAP_FSETID for chmod | ||
328 | * CAP_CHOWN for chown | ||
329 | * CAP_MKNOD for mknod | ||
330 | */ | ||
331 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
332 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
333 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
334 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
335 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
336 | cap_raise(override_cred->cap_effective, CAP_MKNOD); | ||
337 | old_cred = override_creds(override_cred); | ||
338 | |||
339 | err = -EIO; | ||
340 | if (lock_rename(workdir, upperdir) != NULL) { | ||
341 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
342 | goto out_unlock; | ||
343 | } | ||
344 | upperdentry = ovl_dentry_upper(dentry); | ||
345 | if (upperdentry) { | ||
346 | unlock_rename(workdir, upperdir); | ||
347 | err = 0; | ||
348 | /* Raced with another copy-up? Do the setattr here */ | ||
349 | if (attr) { | ||
350 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
351 | err = notify_change(upperdentry, attr, NULL); | ||
352 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
353 | } | ||
354 | goto out_put_cred; | ||
355 | } | ||
356 | |||
357 | err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, | ||
358 | stat, attr, link); | ||
359 | if (!err) { | ||
360 | /* Restore timestamps on parent (best effort) */ | ||
361 | ovl_set_timestamps(upperdir, &pstat); | ||
362 | } | ||
363 | out_unlock: | ||
364 | unlock_rename(workdir, upperdir); | ||
365 | out_put_cred: | ||
366 | revert_creds(old_cred); | ||
367 | put_cred(override_cred); | ||
368 | |||
369 | out_free_link: | ||
370 | if (link) | ||
371 | free_page((unsigned long) link); | ||
372 | |||
373 | return err; | ||
374 | } | ||
375 | |||
376 | int ovl_copy_up(struct dentry *dentry) | ||
377 | { | ||
378 | int err; | ||
379 | |||
380 | err = 0; | ||
381 | while (!err) { | ||
382 | struct dentry *next; | ||
383 | struct dentry *parent; | ||
384 | struct path lowerpath; | ||
385 | struct kstat stat; | ||
386 | enum ovl_path_type type = ovl_path_type(dentry); | ||
387 | |||
388 | if (type != OVL_PATH_LOWER) | ||
389 | break; | ||
390 | |||
391 | next = dget(dentry); | ||
392 | /* find the topmost dentry not yet copied up */ | ||
393 | for (;;) { | ||
394 | parent = dget_parent(next); | ||
395 | |||
396 | type = ovl_path_type(parent); | ||
397 | if (type != OVL_PATH_LOWER) | ||
398 | break; | ||
399 | |||
400 | dput(next); | ||
401 | next = parent; | ||
402 | } | ||
403 | |||
404 | ovl_path_lower(next, &lowerpath); | ||
405 | err = vfs_getattr(&lowerpath, &stat); | ||
406 | if (!err) | ||
407 | err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); | ||
408 | |||
409 | dput(parent); | ||
410 | dput(next); | ||
411 | } | ||
412 | |||
413 | return err; | ||
414 | } | ||
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c new file mode 100644 index 000000000000..15cd91ad9940 --- /dev/null +++ b/fs/overlayfs/dir.c | |||
@@ -0,0 +1,921 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/namei.h> | ||
12 | #include <linux/xattr.h> | ||
13 | #include <linux/security.h> | ||
14 | #include <linux/cred.h> | ||
15 | #include "overlayfs.h" | ||
16 | |||
17 | void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) | ||
18 | { | ||
19 | int err; | ||
20 | |||
21 | dget(wdentry); | ||
22 | if (S_ISDIR(wdentry->d_inode->i_mode)) | ||
23 | err = ovl_do_rmdir(wdir, wdentry); | ||
24 | else | ||
25 | err = ovl_do_unlink(wdir, wdentry); | ||
26 | dput(wdentry); | ||
27 | |||
28 | if (err) { | ||
29 | pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", | ||
30 | wdentry, err); | ||
31 | } | ||
32 | } | ||
33 | |||
34 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) | ||
35 | { | ||
36 | struct dentry *temp; | ||
37 | char name[20]; | ||
38 | |||
39 | snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry); | ||
40 | |||
41 | temp = lookup_one_len(name, workdir, strlen(name)); | ||
42 | if (!IS_ERR(temp) && temp->d_inode) { | ||
43 | pr_err("overlayfs: workdir/%s already exists\n", name); | ||
44 | dput(temp); | ||
45 | temp = ERR_PTR(-EIO); | ||
46 | } | ||
47 | |||
48 | return temp; | ||
49 | } | ||
50 | |||
51 | /* caller holds i_mutex on workdir */ | ||
52 | static struct dentry *ovl_whiteout(struct dentry *workdir, | ||
53 | struct dentry *dentry) | ||
54 | { | ||
55 | int err; | ||
56 | struct dentry *whiteout; | ||
57 | struct inode *wdir = workdir->d_inode; | ||
58 | |||
59 | whiteout = ovl_lookup_temp(workdir, dentry); | ||
60 | if (IS_ERR(whiteout)) | ||
61 | return whiteout; | ||
62 | |||
63 | err = ovl_do_whiteout(wdir, whiteout); | ||
64 | if (err) { | ||
65 | dput(whiteout); | ||
66 | whiteout = ERR_PTR(err); | ||
67 | } | ||
68 | |||
69 | return whiteout; | ||
70 | } | ||
71 | |||
72 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
73 | struct kstat *stat, const char *link, | ||
74 | struct dentry *hardlink, bool debug) | ||
75 | { | ||
76 | int err; | ||
77 | |||
78 | if (newdentry->d_inode) | ||
79 | return -ESTALE; | ||
80 | |||
81 | if (hardlink) { | ||
82 | err = ovl_do_link(hardlink, dir, newdentry, debug); | ||
83 | } else { | ||
84 | switch (stat->mode & S_IFMT) { | ||
85 | case S_IFREG: | ||
86 | err = ovl_do_create(dir, newdentry, stat->mode, debug); | ||
87 | break; | ||
88 | |||
89 | case S_IFDIR: | ||
90 | err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); | ||
91 | break; | ||
92 | |||
93 | case S_IFCHR: | ||
94 | case S_IFBLK: | ||
95 | case S_IFIFO: | ||
96 | case S_IFSOCK: | ||
97 | err = ovl_do_mknod(dir, newdentry, | ||
98 | stat->mode, stat->rdev, debug); | ||
99 | break; | ||
100 | |||
101 | case S_IFLNK: | ||
102 | err = ovl_do_symlink(dir, newdentry, link, debug); | ||
103 | break; | ||
104 | |||
105 | default: | ||
106 | err = -EPERM; | ||
107 | } | ||
108 | } | ||
109 | if (!err && WARN_ON(!newdentry->d_inode)) { | ||
110 | /* | ||
111 | * Not quite sure if non-instantiated dentry is legal or not. | ||
112 | * VFS doesn't seem to care so check and warn here. | ||
113 | */ | ||
114 | err = -ENOENT; | ||
115 | } | ||
116 | return err; | ||
117 | } | ||
118 | |||
119 | static int ovl_set_opaque(struct dentry *upperdentry) | ||
120 | { | ||
121 | return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); | ||
122 | } | ||
123 | |||
124 | static void ovl_remove_opaque(struct dentry *upperdentry) | ||
125 | { | ||
126 | int err; | ||
127 | |||
128 | err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); | ||
129 | if (err) { | ||
130 | pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", | ||
131 | upperdentry->d_name.name, err); | ||
132 | } | ||
133 | } | ||
134 | |||
135 | static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
136 | struct kstat *stat) | ||
137 | { | ||
138 | int err; | ||
139 | enum ovl_path_type type; | ||
140 | struct path realpath; | ||
141 | |||
142 | type = ovl_path_real(dentry, &realpath); | ||
143 | err = vfs_getattr(&realpath, stat); | ||
144 | if (err) | ||
145 | return err; | ||
146 | |||
147 | stat->dev = dentry->d_sb->s_dev; | ||
148 | stat->ino = dentry->d_inode->i_ino; | ||
149 | |||
150 | /* | ||
151 | * It's probably not worth it to count subdirs to get the | ||
152 | * correct link count. nlink=1 seems to pacify 'find' and | ||
153 | * other utilities. | ||
154 | */ | ||
155 | if (type == OVL_PATH_MERGE) | ||
156 | stat->nlink = 1; | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static int ovl_create_upper(struct dentry *dentry, struct inode *inode, | ||
162 | struct kstat *stat, const char *link, | ||
163 | struct dentry *hardlink) | ||
164 | { | ||
165 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
166 | struct inode *udir = upperdir->d_inode; | ||
167 | struct dentry *newdentry; | ||
168 | int err; | ||
169 | |||
170 | mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT); | ||
171 | newdentry = lookup_one_len(dentry->d_name.name, upperdir, | ||
172 | dentry->d_name.len); | ||
173 | err = PTR_ERR(newdentry); | ||
174 | if (IS_ERR(newdentry)) | ||
175 | goto out_unlock; | ||
176 | err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); | ||
177 | if (err) | ||
178 | goto out_dput; | ||
179 | |||
180 | ovl_dentry_version_inc(dentry->d_parent); | ||
181 | ovl_dentry_update(dentry, newdentry); | ||
182 | ovl_copyattr(newdentry->d_inode, inode); | ||
183 | d_instantiate(dentry, inode); | ||
184 | newdentry = NULL; | ||
185 | out_dput: | ||
186 | dput(newdentry); | ||
187 | out_unlock: | ||
188 | mutex_unlock(&udir->i_mutex); | ||
189 | return err; | ||
190 | } | ||
191 | |||
192 | static int ovl_lock_rename_workdir(struct dentry *workdir, | ||
193 | struct dentry *upperdir) | ||
194 | { | ||
195 | /* Workdir should not be the same as upperdir */ | ||
196 | if (workdir == upperdir) | ||
197 | goto err; | ||
198 | |||
199 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
200 | if (lock_rename(workdir, upperdir) != NULL) | ||
201 | goto err_unlock; | ||
202 | |||
203 | return 0; | ||
204 | |||
205 | err_unlock: | ||
206 | unlock_rename(workdir, upperdir); | ||
207 | err: | ||
208 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
209 | return -EIO; | ||
210 | } | ||
211 | |||
212 | static struct dentry *ovl_clear_empty(struct dentry *dentry, | ||
213 | struct list_head *list) | ||
214 | { | ||
215 | struct dentry *workdir = ovl_workdir(dentry); | ||
216 | struct inode *wdir = workdir->d_inode; | ||
217 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
218 | struct inode *udir = upperdir->d_inode; | ||
219 | struct path upperpath; | ||
220 | struct dentry *upper; | ||
221 | struct dentry *opaquedir; | ||
222 | struct kstat stat; | ||
223 | int err; | ||
224 | |||
225 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
226 | if (err) | ||
227 | goto out; | ||
228 | |||
229 | ovl_path_upper(dentry, &upperpath); | ||
230 | err = vfs_getattr(&upperpath, &stat); | ||
231 | if (err) | ||
232 | goto out_unlock; | ||
233 | |||
234 | err = -ESTALE; | ||
235 | if (!S_ISDIR(stat.mode)) | ||
236 | goto out_unlock; | ||
237 | upper = upperpath.dentry; | ||
238 | if (upper->d_parent->d_inode != udir) | ||
239 | goto out_unlock; | ||
240 | |||
241 | opaquedir = ovl_lookup_temp(workdir, dentry); | ||
242 | err = PTR_ERR(opaquedir); | ||
243 | if (IS_ERR(opaquedir)) | ||
244 | goto out_unlock; | ||
245 | |||
246 | err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); | ||
247 | if (err) | ||
248 | goto out_dput; | ||
249 | |||
250 | err = ovl_copy_xattr(upper, opaquedir); | ||
251 | if (err) | ||
252 | goto out_cleanup; | ||
253 | |||
254 | err = ovl_set_opaque(opaquedir); | ||
255 | if (err) | ||
256 | goto out_cleanup; | ||
257 | |||
258 | mutex_lock(&opaquedir->d_inode->i_mutex); | ||
259 | err = ovl_set_attr(opaquedir, &stat); | ||
260 | mutex_unlock(&opaquedir->d_inode->i_mutex); | ||
261 | if (err) | ||
262 | goto out_cleanup; | ||
263 | |||
264 | err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); | ||
265 | if (err) | ||
266 | goto out_cleanup; | ||
267 | |||
268 | ovl_cleanup_whiteouts(upper, list); | ||
269 | ovl_cleanup(wdir, upper); | ||
270 | unlock_rename(workdir, upperdir); | ||
271 | |||
272 | /* dentry's upper doesn't match now, get rid of it */ | ||
273 | d_drop(dentry); | ||
274 | |||
275 | return opaquedir; | ||
276 | |||
277 | out_cleanup: | ||
278 | ovl_cleanup(wdir, opaquedir); | ||
279 | out_dput: | ||
280 | dput(opaquedir); | ||
281 | out_unlock: | ||
282 | unlock_rename(workdir, upperdir); | ||
283 | out: | ||
284 | return ERR_PTR(err); | ||
285 | } | ||
286 | |||
287 | static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, | ||
288 | enum ovl_path_type type) | ||
289 | { | ||
290 | int err; | ||
291 | struct dentry *ret = NULL; | ||
292 | LIST_HEAD(list); | ||
293 | |||
294 | err = ovl_check_empty_dir(dentry, &list); | ||
295 | if (err) | ||
296 | ret = ERR_PTR(err); | ||
297 | else if (type == OVL_PATH_MERGE) | ||
298 | ret = ovl_clear_empty(dentry, &list); | ||
299 | |||
300 | ovl_cache_free(&list); | ||
301 | |||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, | ||
306 | struct kstat *stat, const char *link, | ||
307 | struct dentry *hardlink) | ||
308 | { | ||
309 | struct dentry *workdir = ovl_workdir(dentry); | ||
310 | struct inode *wdir = workdir->d_inode; | ||
311 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
312 | struct inode *udir = upperdir->d_inode; | ||
313 | struct dentry *upper; | ||
314 | struct dentry *newdentry; | ||
315 | int err; | ||
316 | |||
317 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
318 | if (err) | ||
319 | goto out; | ||
320 | |||
321 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
322 | err = PTR_ERR(newdentry); | ||
323 | if (IS_ERR(newdentry)) | ||
324 | goto out_unlock; | ||
325 | |||
326 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
327 | dentry->d_name.len); | ||
328 | err = PTR_ERR(upper); | ||
329 | if (IS_ERR(upper)) | ||
330 | goto out_dput; | ||
331 | |||
332 | err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); | ||
333 | if (err) | ||
334 | goto out_dput2; | ||
335 | |||
336 | if (S_ISDIR(stat->mode)) { | ||
337 | err = ovl_set_opaque(newdentry); | ||
338 | if (err) | ||
339 | goto out_cleanup; | ||
340 | |||
341 | err = ovl_do_rename(wdir, newdentry, udir, upper, | ||
342 | RENAME_EXCHANGE); | ||
343 | if (err) | ||
344 | goto out_cleanup; | ||
345 | |||
346 | ovl_cleanup(wdir, upper); | ||
347 | } else { | ||
348 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
349 | if (err) | ||
350 | goto out_cleanup; | ||
351 | } | ||
352 | ovl_dentry_version_inc(dentry->d_parent); | ||
353 | ovl_dentry_update(dentry, newdentry); | ||
354 | ovl_copyattr(newdentry->d_inode, inode); | ||
355 | d_instantiate(dentry, inode); | ||
356 | newdentry = NULL; | ||
357 | out_dput2: | ||
358 | dput(upper); | ||
359 | out_dput: | ||
360 | dput(newdentry); | ||
361 | out_unlock: | ||
362 | unlock_rename(workdir, upperdir); | ||
363 | out: | ||
364 | return err; | ||
365 | |||
366 | out_cleanup: | ||
367 | ovl_cleanup(wdir, newdentry); | ||
368 | goto out_dput2; | ||
369 | } | ||
370 | |||
371 | static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, | ||
372 | const char *link, struct dentry *hardlink) | ||
373 | { | ||
374 | int err; | ||
375 | struct inode *inode; | ||
376 | struct kstat stat = { | ||
377 | .mode = mode, | ||
378 | .rdev = rdev, | ||
379 | }; | ||
380 | |||
381 | err = -ENOMEM; | ||
382 | inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); | ||
383 | if (!inode) | ||
384 | goto out; | ||
385 | |||
386 | err = ovl_copy_up(dentry->d_parent); | ||
387 | if (err) | ||
388 | goto out_iput; | ||
389 | |||
390 | if (!ovl_dentry_is_opaque(dentry)) { | ||
391 | err = ovl_create_upper(dentry, inode, &stat, link, hardlink); | ||
392 | } else { | ||
393 | const struct cred *old_cred; | ||
394 | struct cred *override_cred; | ||
395 | |||
396 | err = -ENOMEM; | ||
397 | override_cred = prepare_creds(); | ||
398 | if (!override_cred) | ||
399 | goto out_iput; | ||
400 | |||
401 | /* | ||
402 | * CAP_SYS_ADMIN for setting opaque xattr | ||
403 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
404 | * CAP_FOWNER for removing whiteout from sticky dir | ||
405 | */ | ||
406 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
407 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
408 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
409 | old_cred = override_creds(override_cred); | ||
410 | |||
411 | err = ovl_create_over_whiteout(dentry, inode, &stat, link, | ||
412 | hardlink); | ||
413 | |||
414 | revert_creds(old_cred); | ||
415 | put_cred(override_cred); | ||
416 | } | ||
417 | |||
418 | if (!err) | ||
419 | inode = NULL; | ||
420 | out_iput: | ||
421 | iput(inode); | ||
422 | out: | ||
423 | return err; | ||
424 | } | ||
425 | |||
426 | static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, | ||
427 | const char *link) | ||
428 | { | ||
429 | int err; | ||
430 | |||
431 | err = ovl_want_write(dentry); | ||
432 | if (!err) { | ||
433 | err = ovl_create_or_link(dentry, mode, rdev, link, NULL); | ||
434 | ovl_drop_write(dentry); | ||
435 | } | ||
436 | |||
437 | return err; | ||
438 | } | ||
439 | |||
440 | static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
441 | bool excl) | ||
442 | { | ||
443 | return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); | ||
444 | } | ||
445 | |||
446 | static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
447 | { | ||
448 | return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); | ||
449 | } | ||
450 | |||
451 | static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
452 | dev_t rdev) | ||
453 | { | ||
454 | /* Don't allow creation of "whiteout" on overlay */ | ||
455 | if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) | ||
456 | return -EPERM; | ||
457 | |||
458 | return ovl_create_object(dentry, mode, rdev, NULL); | ||
459 | } | ||
460 | |||
461 | static int ovl_symlink(struct inode *dir, struct dentry *dentry, | ||
462 | const char *link) | ||
463 | { | ||
464 | return ovl_create_object(dentry, S_IFLNK, 0, link); | ||
465 | } | ||
466 | |||
467 | static int ovl_link(struct dentry *old, struct inode *newdir, | ||
468 | struct dentry *new) | ||
469 | { | ||
470 | int err; | ||
471 | struct dentry *upper; | ||
472 | |||
473 | err = ovl_want_write(old); | ||
474 | if (err) | ||
475 | goto out; | ||
476 | |||
477 | err = ovl_copy_up(old); | ||
478 | if (err) | ||
479 | goto out_drop_write; | ||
480 | |||
481 | upper = ovl_dentry_upper(old); | ||
482 | err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper); | ||
483 | |||
484 | out_drop_write: | ||
485 | ovl_drop_write(old); | ||
486 | out: | ||
487 | return err; | ||
488 | } | ||
489 | |||
490 | static int ovl_remove_and_whiteout(struct dentry *dentry, | ||
491 | enum ovl_path_type type, bool is_dir) | ||
492 | { | ||
493 | struct dentry *workdir = ovl_workdir(dentry); | ||
494 | struct inode *wdir = workdir->d_inode; | ||
495 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
496 | struct inode *udir = upperdir->d_inode; | ||
497 | struct dentry *whiteout; | ||
498 | struct dentry *upper; | ||
499 | struct dentry *opaquedir = NULL; | ||
500 | int err; | ||
501 | |||
502 | if (is_dir) { | ||
503 | opaquedir = ovl_check_empty_and_clear(dentry, type); | ||
504 | err = PTR_ERR(opaquedir); | ||
505 | if (IS_ERR(opaquedir)) | ||
506 | goto out; | ||
507 | } | ||
508 | |||
509 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
510 | if (err) | ||
511 | goto out_dput; | ||
512 | |||
513 | whiteout = ovl_whiteout(workdir, dentry); | ||
514 | err = PTR_ERR(whiteout); | ||
515 | if (IS_ERR(whiteout)) | ||
516 | goto out_unlock; | ||
517 | |||
518 | if (type == OVL_PATH_LOWER) { | ||
519 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
520 | dentry->d_name.len); | ||
521 | err = PTR_ERR(upper); | ||
522 | if (IS_ERR(upper)) | ||
523 | goto kill_whiteout; | ||
524 | |||
525 | err = ovl_do_rename(wdir, whiteout, udir, upper, 0); | ||
526 | dput(upper); | ||
527 | if (err) | ||
528 | goto kill_whiteout; | ||
529 | } else { | ||
530 | int flags = 0; | ||
531 | |||
532 | upper = ovl_dentry_upper(dentry); | ||
533 | if (opaquedir) | ||
534 | upper = opaquedir; | ||
535 | err = -ESTALE; | ||
536 | if (upper->d_parent != upperdir) | ||
537 | goto kill_whiteout; | ||
538 | |||
539 | if (is_dir) | ||
540 | flags |= RENAME_EXCHANGE; | ||
541 | |||
542 | err = ovl_do_rename(wdir, whiteout, udir, upper, flags); | ||
543 | if (err) | ||
544 | goto kill_whiteout; | ||
545 | |||
546 | if (is_dir) | ||
547 | ovl_cleanup(wdir, upper); | ||
548 | } | ||
549 | ovl_dentry_version_inc(dentry->d_parent); | ||
550 | out_d_drop: | ||
551 | d_drop(dentry); | ||
552 | dput(whiteout); | ||
553 | out_unlock: | ||
554 | unlock_rename(workdir, upperdir); | ||
555 | out_dput: | ||
556 | dput(opaquedir); | ||
557 | out: | ||
558 | return err; | ||
559 | |||
560 | kill_whiteout: | ||
561 | ovl_cleanup(wdir, whiteout); | ||
562 | goto out_d_drop; | ||
563 | } | ||
564 | |||
565 | static int ovl_remove_upper(struct dentry *dentry, bool is_dir) | ||
566 | { | ||
567 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
568 | struct inode *dir = upperdir->d_inode; | ||
569 | struct dentry *upper = ovl_dentry_upper(dentry); | ||
570 | int err; | ||
571 | |||
572 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
573 | err = -ESTALE; | ||
574 | if (upper->d_parent == upperdir) { | ||
575 | /* Don't let d_delete() think it can reset d_inode */ | ||
576 | dget(upper); | ||
577 | if (is_dir) | ||
578 | err = vfs_rmdir(dir, upper); | ||
579 | else | ||
580 | err = vfs_unlink(dir, upper, NULL); | ||
581 | dput(upper); | ||
582 | ovl_dentry_version_inc(dentry->d_parent); | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * Keeping this dentry hashed would mean having to release | ||
587 | * upperpath/lowerpath, which could only be done if we are the | ||
588 | * sole user of this dentry. Too tricky... Just unhash for | ||
589 | * now. | ||
590 | */ | ||
591 | d_drop(dentry); | ||
592 | mutex_unlock(&dir->i_mutex); | ||
593 | |||
594 | return err; | ||
595 | } | ||
596 | |||
597 | static inline int ovl_check_sticky(struct dentry *dentry) | ||
598 | { | ||
599 | struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; | ||
600 | struct inode *inode = ovl_dentry_real(dentry)->d_inode; | ||
601 | |||
602 | if (check_sticky(dir, inode)) | ||
603 | return -EPERM; | ||
604 | |||
605 | return 0; | ||
606 | } | ||
607 | |||
608 | static int ovl_do_remove(struct dentry *dentry, bool is_dir) | ||
609 | { | ||
610 | enum ovl_path_type type; | ||
611 | int err; | ||
612 | |||
613 | err = ovl_check_sticky(dentry); | ||
614 | if (err) | ||
615 | goto out; | ||
616 | |||
617 | err = ovl_want_write(dentry); | ||
618 | if (err) | ||
619 | goto out; | ||
620 | |||
621 | err = ovl_copy_up(dentry->d_parent); | ||
622 | if (err) | ||
623 | goto out_drop_write; | ||
624 | |||
625 | type = ovl_path_type(dentry); | ||
626 | if (type == OVL_PATH_PURE_UPPER) { | ||
627 | err = ovl_remove_upper(dentry, is_dir); | ||
628 | } else { | ||
629 | const struct cred *old_cred; | ||
630 | struct cred *override_cred; | ||
631 | |||
632 | err = -ENOMEM; | ||
633 | override_cred = prepare_creds(); | ||
634 | if (!override_cred) | ||
635 | goto out_drop_write; | ||
636 | |||
637 | /* | ||
638 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
639 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
640 | * CAP_FOWNER for removing whiteout from sticky dir | ||
641 | * CAP_FSETID for chmod of opaque dir | ||
642 | * CAP_CHOWN for chown of opaque dir | ||
643 | */ | ||
644 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
645 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
646 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
647 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
648 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
649 | old_cred = override_creds(override_cred); | ||
650 | |||
651 | err = ovl_remove_and_whiteout(dentry, type, is_dir); | ||
652 | |||
653 | revert_creds(old_cred); | ||
654 | put_cred(override_cred); | ||
655 | } | ||
656 | out_drop_write: | ||
657 | ovl_drop_write(dentry); | ||
658 | out: | ||
659 | return err; | ||
660 | } | ||
661 | |||
662 | static int ovl_unlink(struct inode *dir, struct dentry *dentry) | ||
663 | { | ||
664 | return ovl_do_remove(dentry, false); | ||
665 | } | ||
666 | |||
667 | static int ovl_rmdir(struct inode *dir, struct dentry *dentry) | ||
668 | { | ||
669 | return ovl_do_remove(dentry, true); | ||
670 | } | ||
671 | |||
672 | static int ovl_rename2(struct inode *olddir, struct dentry *old, | ||
673 | struct inode *newdir, struct dentry *new, | ||
674 | unsigned int flags) | ||
675 | { | ||
676 | int err; | ||
677 | enum ovl_path_type old_type; | ||
678 | enum ovl_path_type new_type; | ||
679 | struct dentry *old_upperdir; | ||
680 | struct dentry *new_upperdir; | ||
681 | struct dentry *olddentry; | ||
682 | struct dentry *newdentry; | ||
683 | struct dentry *trap; | ||
684 | bool old_opaque; | ||
685 | bool new_opaque; | ||
686 | bool new_create = false; | ||
687 | bool cleanup_whiteout = false; | ||
688 | bool overwrite = !(flags & RENAME_EXCHANGE); | ||
689 | bool is_dir = S_ISDIR(old->d_inode->i_mode); | ||
690 | bool new_is_dir = false; | ||
691 | struct dentry *opaquedir = NULL; | ||
692 | const struct cred *old_cred = NULL; | ||
693 | struct cred *override_cred = NULL; | ||
694 | |||
695 | err = -EINVAL; | ||
696 | if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) | ||
697 | goto out; | ||
698 | |||
699 | flags &= ~RENAME_NOREPLACE; | ||
700 | |||
701 | err = ovl_check_sticky(old); | ||
702 | if (err) | ||
703 | goto out; | ||
704 | |||
705 | /* Don't copy up directory trees */ | ||
706 | old_type = ovl_path_type(old); | ||
707 | err = -EXDEV; | ||
708 | if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) | ||
709 | goto out; | ||
710 | |||
711 | if (new->d_inode) { | ||
712 | err = ovl_check_sticky(new); | ||
713 | if (err) | ||
714 | goto out; | ||
715 | |||
716 | if (S_ISDIR(new->d_inode->i_mode)) | ||
717 | new_is_dir = true; | ||
718 | |||
719 | new_type = ovl_path_type(new); | ||
720 | err = -EXDEV; | ||
721 | if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) | ||
722 | goto out; | ||
723 | |||
724 | err = 0; | ||
725 | if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { | ||
726 | if (ovl_dentry_lower(old)->d_inode == | ||
727 | ovl_dentry_lower(new)->d_inode) | ||
728 | goto out; | ||
729 | } | ||
730 | if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { | ||
731 | if (ovl_dentry_upper(old)->d_inode == | ||
732 | ovl_dentry_upper(new)->d_inode) | ||
733 | goto out; | ||
734 | } | ||
735 | } else { | ||
736 | if (ovl_dentry_is_opaque(new)) | ||
737 | new_type = OVL_PATH_UPPER; | ||
738 | else | ||
739 | new_type = OVL_PATH_PURE_UPPER; | ||
740 | } | ||
741 | |||
742 | err = ovl_want_write(old); | ||
743 | if (err) | ||
744 | goto out; | ||
745 | |||
746 | err = ovl_copy_up(old); | ||
747 | if (err) | ||
748 | goto out_drop_write; | ||
749 | |||
750 | err = ovl_copy_up(new->d_parent); | ||
751 | if (err) | ||
752 | goto out_drop_write; | ||
753 | if (!overwrite) { | ||
754 | err = ovl_copy_up(new); | ||
755 | if (err) | ||
756 | goto out_drop_write; | ||
757 | } | ||
758 | |||
759 | old_opaque = old_type != OVL_PATH_PURE_UPPER; | ||
760 | new_opaque = new_type != OVL_PATH_PURE_UPPER; | ||
761 | |||
762 | if (old_opaque || new_opaque) { | ||
763 | err = -ENOMEM; | ||
764 | override_cred = prepare_creds(); | ||
765 | if (!override_cred) | ||
766 | goto out_drop_write; | ||
767 | |||
768 | /* | ||
769 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
770 | * CAP_DAC_OVERRIDE for create in workdir | ||
771 | * CAP_FOWNER for removing whiteout from sticky dir | ||
772 | * CAP_FSETID for chmod of opaque dir | ||
773 | * CAP_CHOWN for chown of opaque dir | ||
774 | */ | ||
775 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
776 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
777 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
778 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
779 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
780 | old_cred = override_creds(override_cred); | ||
781 | } | ||
782 | |||
783 | if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { | ||
784 | opaquedir = ovl_check_empty_and_clear(new, new_type); | ||
785 | err = PTR_ERR(opaquedir); | ||
786 | if (IS_ERR(opaquedir)) { | ||
787 | opaquedir = NULL; | ||
788 | goto out_revert_creds; | ||
789 | } | ||
790 | } | ||
791 | |||
792 | if (overwrite) { | ||
793 | if (old_opaque) { | ||
794 | if (new->d_inode || !new_opaque) { | ||
795 | /* Whiteout source */ | ||
796 | flags |= RENAME_WHITEOUT; | ||
797 | } else { | ||
798 | /* Switch whiteouts */ | ||
799 | flags |= RENAME_EXCHANGE; | ||
800 | } | ||
801 | } else if (is_dir && !new->d_inode && new_opaque) { | ||
802 | flags |= RENAME_EXCHANGE; | ||
803 | cleanup_whiteout = true; | ||
804 | } | ||
805 | } | ||
806 | |||
807 | old_upperdir = ovl_dentry_upper(old->d_parent); | ||
808 | new_upperdir = ovl_dentry_upper(new->d_parent); | ||
809 | |||
810 | trap = lock_rename(new_upperdir, old_upperdir); | ||
811 | |||
812 | olddentry = ovl_dentry_upper(old); | ||
813 | newdentry = ovl_dentry_upper(new); | ||
814 | if (newdentry) { | ||
815 | if (opaquedir) { | ||
816 | newdentry = opaquedir; | ||
817 | opaquedir = NULL; | ||
818 | } else { | ||
819 | dget(newdentry); | ||
820 | } | ||
821 | } else { | ||
822 | new_create = true; | ||
823 | newdentry = lookup_one_len(new->d_name.name, new_upperdir, | ||
824 | new->d_name.len); | ||
825 | err = PTR_ERR(newdentry); | ||
826 | if (IS_ERR(newdentry)) | ||
827 | goto out_unlock; | ||
828 | } | ||
829 | |||
830 | err = -ESTALE; | ||
831 | if (olddentry->d_parent != old_upperdir) | ||
832 | goto out_dput; | ||
833 | if (newdentry->d_parent != new_upperdir) | ||
834 | goto out_dput; | ||
835 | if (olddentry == trap) | ||
836 | goto out_dput; | ||
837 | if (newdentry == trap) | ||
838 | goto out_dput; | ||
839 | |||
840 | if (is_dir && !old_opaque && new_opaque) { | ||
841 | err = ovl_set_opaque(olddentry); | ||
842 | if (err) | ||
843 | goto out_dput; | ||
844 | } | ||
845 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) { | ||
846 | err = ovl_set_opaque(newdentry); | ||
847 | if (err) | ||
848 | goto out_dput; | ||
849 | } | ||
850 | |||
851 | if (old_opaque || new_opaque) { | ||
852 | err = ovl_do_rename(old_upperdir->d_inode, olddentry, | ||
853 | new_upperdir->d_inode, newdentry, | ||
854 | flags); | ||
855 | } else { | ||
856 | /* No debug for the plain case */ | ||
857 | BUG_ON(flags & ~RENAME_EXCHANGE); | ||
858 | err = vfs_rename(old_upperdir->d_inode, olddentry, | ||
859 | new_upperdir->d_inode, newdentry, | ||
860 | NULL, flags); | ||
861 | } | ||
862 | |||
863 | if (err) { | ||
864 | if (is_dir && !old_opaque && new_opaque) | ||
865 | ovl_remove_opaque(olddentry); | ||
866 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) | ||
867 | ovl_remove_opaque(newdentry); | ||
868 | goto out_dput; | ||
869 | } | ||
870 | |||
871 | if (is_dir && old_opaque && !new_opaque) | ||
872 | ovl_remove_opaque(olddentry); | ||
873 | if (!overwrite && new_is_dir && !old_opaque && new_opaque) | ||
874 | ovl_remove_opaque(newdentry); | ||
875 | |||
876 | if (old_opaque != new_opaque) { | ||
877 | ovl_dentry_set_opaque(old, new_opaque); | ||
878 | if (!overwrite) | ||
879 | ovl_dentry_set_opaque(new, old_opaque); | ||
880 | } | ||
881 | |||
882 | if (cleanup_whiteout) | ||
883 | ovl_cleanup(old_upperdir->d_inode, newdentry); | ||
884 | |||
885 | ovl_dentry_version_inc(old->d_parent); | ||
886 | ovl_dentry_version_inc(new->d_parent); | ||
887 | |||
888 | out_dput: | ||
889 | dput(newdentry); | ||
890 | out_unlock: | ||
891 | unlock_rename(new_upperdir, old_upperdir); | ||
892 | out_revert_creds: | ||
893 | if (old_opaque || new_opaque) { | ||
894 | revert_creds(old_cred); | ||
895 | put_cred(override_cred); | ||
896 | } | ||
897 | out_drop_write: | ||
898 | ovl_drop_write(old); | ||
899 | out: | ||
900 | dput(opaquedir); | ||
901 | return err; | ||
902 | } | ||
903 | |||
904 | const struct inode_operations ovl_dir_inode_operations = { | ||
905 | .lookup = ovl_lookup, | ||
906 | .mkdir = ovl_mkdir, | ||
907 | .symlink = ovl_symlink, | ||
908 | .unlink = ovl_unlink, | ||
909 | .rmdir = ovl_rmdir, | ||
910 | .rename2 = ovl_rename2, | ||
911 | .link = ovl_link, | ||
912 | .setattr = ovl_setattr, | ||
913 | .create = ovl_create, | ||
914 | .mknod = ovl_mknod, | ||
915 | .permission = ovl_permission, | ||
916 | .getattr = ovl_dir_getattr, | ||
917 | .setxattr = ovl_setxattr, | ||
918 | .getxattr = ovl_getxattr, | ||
919 | .listxattr = ovl_listxattr, | ||
920 | .removexattr = ovl_removexattr, | ||
921 | }; | ||
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c new file mode 100644 index 000000000000..af2d18c9fcee --- /dev/null +++ b/fs/overlayfs/inode.c | |||
@@ -0,0 +1,425 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/xattr.h> | ||
13 | #include "overlayfs.h" | ||
14 | |||
15 | static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, | ||
16 | bool no_data) | ||
17 | { | ||
18 | int err; | ||
19 | struct dentry *parent; | ||
20 | struct kstat stat; | ||
21 | struct path lowerpath; | ||
22 | |||
23 | parent = dget_parent(dentry); | ||
24 | err = ovl_copy_up(parent); | ||
25 | if (err) | ||
26 | goto out_dput_parent; | ||
27 | |||
28 | ovl_path_lower(dentry, &lowerpath); | ||
29 | err = vfs_getattr(&lowerpath, &stat); | ||
30 | if (err) | ||
31 | goto out_dput_parent; | ||
32 | |||
33 | if (no_data) | ||
34 | stat.size = 0; | ||
35 | |||
36 | err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); | ||
37 | |||
38 | out_dput_parent: | ||
39 | dput(parent); | ||
40 | return err; | ||
41 | } | ||
42 | |||
43 | int ovl_setattr(struct dentry *dentry, struct iattr *attr) | ||
44 | { | ||
45 | int err; | ||
46 | struct dentry *upperdentry; | ||
47 | |||
48 | err = ovl_want_write(dentry); | ||
49 | if (err) | ||
50 | goto out; | ||
51 | |||
52 | upperdentry = ovl_dentry_upper(dentry); | ||
53 | if (upperdentry) { | ||
54 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
55 | err = notify_change(upperdentry, attr, NULL); | ||
56 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
57 | } else { | ||
58 | err = ovl_copy_up_last(dentry, attr, false); | ||
59 | } | ||
60 | ovl_drop_write(dentry); | ||
61 | out: | ||
62 | return err; | ||
63 | } | ||
64 | |||
65 | static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
66 | struct kstat *stat) | ||
67 | { | ||
68 | struct path realpath; | ||
69 | |||
70 | ovl_path_real(dentry, &realpath); | ||
71 | return vfs_getattr(&realpath, stat); | ||
72 | } | ||
73 | |||
74 | int ovl_permission(struct inode *inode, int mask) | ||
75 | { | ||
76 | struct ovl_entry *oe; | ||
77 | struct dentry *alias = NULL; | ||
78 | struct inode *realinode; | ||
79 | struct dentry *realdentry; | ||
80 | bool is_upper; | ||
81 | int err; | ||
82 | |||
83 | if (S_ISDIR(inode->i_mode)) { | ||
84 | oe = inode->i_private; | ||
85 | } else if (mask & MAY_NOT_BLOCK) { | ||
86 | return -ECHILD; | ||
87 | } else { | ||
88 | /* | ||
89 | * For non-directories find an alias and get the info | ||
90 | * from there. | ||
91 | */ | ||
92 | alias = d_find_any_alias(inode); | ||
93 | if (WARN_ON(!alias)) | ||
94 | return -ENOENT; | ||
95 | |||
96 | oe = alias->d_fsdata; | ||
97 | } | ||
98 | |||
99 | realdentry = ovl_entry_real(oe, &is_upper); | ||
100 | |||
101 | /* Careful in RCU walk mode */ | ||
102 | realinode = ACCESS_ONCE(realdentry->d_inode); | ||
103 | if (!realinode) { | ||
104 | WARN_ON(!(mask & MAY_NOT_BLOCK)); | ||
105 | err = -ENOENT; | ||
106 | goto out_dput; | ||
107 | } | ||
108 | |||
109 | if (mask & MAY_WRITE) { | ||
110 | umode_t mode = realinode->i_mode; | ||
111 | |||
112 | /* | ||
113 | * Writes will always be redirected to upper layer, so | ||
114 | * ignore lower layer being read-only. | ||
115 | * | ||
116 | * If the overlay itself is read-only then proceed | ||
117 | * with the permission check, don't return EROFS. | ||
118 | * This will only happen if this is the lower layer of | ||
119 | * another overlayfs. | ||
120 | * | ||
121 | * If upper fs becomes read-only after the overlay was | ||
122 | * constructed return EROFS to prevent modification of | ||
123 | * upper layer. | ||
124 | */ | ||
125 | err = -EROFS; | ||
126 | if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && | ||
127 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) | ||
128 | goto out_dput; | ||
129 | } | ||
130 | |||
131 | err = __inode_permission(realinode, mask); | ||
132 | out_dput: | ||
133 | dput(alias); | ||
134 | return err; | ||
135 | } | ||
136 | |||
137 | |||
138 | struct ovl_link_data { | ||
139 | struct dentry *realdentry; | ||
140 | void *cookie; | ||
141 | }; | ||
142 | |||
143 | static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
144 | { | ||
145 | void *ret; | ||
146 | struct dentry *realdentry; | ||
147 | struct inode *realinode; | ||
148 | |||
149 | realdentry = ovl_dentry_real(dentry); | ||
150 | realinode = realdentry->d_inode; | ||
151 | |||
152 | if (WARN_ON(!realinode->i_op->follow_link)) | ||
153 | return ERR_PTR(-EPERM); | ||
154 | |||
155 | ret = realinode->i_op->follow_link(realdentry, nd); | ||
156 | if (IS_ERR(ret)) | ||
157 | return ret; | ||
158 | |||
159 | if (realinode->i_op->put_link) { | ||
160 | struct ovl_link_data *data; | ||
161 | |||
162 | data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); | ||
163 | if (!data) { | ||
164 | realinode->i_op->put_link(realdentry, nd, ret); | ||
165 | return ERR_PTR(-ENOMEM); | ||
166 | } | ||
167 | data->realdentry = realdentry; | ||
168 | data->cookie = ret; | ||
169 | |||
170 | return data; | ||
171 | } else { | ||
172 | return NULL; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) | ||
177 | { | ||
178 | struct inode *realinode; | ||
179 | struct ovl_link_data *data = c; | ||
180 | |||
181 | if (!data) | ||
182 | return; | ||
183 | |||
184 | realinode = data->realdentry->d_inode; | ||
185 | realinode->i_op->put_link(data->realdentry, nd, data->cookie); | ||
186 | kfree(data); | ||
187 | } | ||
188 | |||
189 | static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | ||
190 | { | ||
191 | struct path realpath; | ||
192 | struct inode *realinode; | ||
193 | |||
194 | ovl_path_real(dentry, &realpath); | ||
195 | realinode = realpath.dentry->d_inode; | ||
196 | |||
197 | if (!realinode->i_op->readlink) | ||
198 | return -EINVAL; | ||
199 | |||
200 | touch_atime(&realpath); | ||
201 | |||
202 | return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); | ||
203 | } | ||
204 | |||
205 | |||
206 | static bool ovl_is_private_xattr(const char *name) | ||
207 | { | ||
208 | return strncmp(name, "trusted.overlay.", 14) == 0; | ||
209 | } | ||
210 | |||
211 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
212 | const void *value, size_t size, int flags) | ||
213 | { | ||
214 | int err; | ||
215 | struct dentry *upperdentry; | ||
216 | |||
217 | err = ovl_want_write(dentry); | ||
218 | if (err) | ||
219 | goto out; | ||
220 | |||
221 | err = -EPERM; | ||
222 | if (ovl_is_private_xattr(name)) | ||
223 | goto out_drop_write; | ||
224 | |||
225 | err = ovl_copy_up(dentry); | ||
226 | if (err) | ||
227 | goto out_drop_write; | ||
228 | |||
229 | upperdentry = ovl_dentry_upper(dentry); | ||
230 | err = vfs_setxattr(upperdentry, name, value, size, flags); | ||
231 | |||
232 | out_drop_write: | ||
233 | ovl_drop_write(dentry); | ||
234 | out: | ||
235 | return err; | ||
236 | } | ||
237 | |||
238 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
239 | void *value, size_t size) | ||
240 | { | ||
241 | if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && | ||
242 | ovl_is_private_xattr(name)) | ||
243 | return -ENODATA; | ||
244 | |||
245 | return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); | ||
246 | } | ||
247 | |||
248 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) | ||
249 | { | ||
250 | ssize_t res; | ||
251 | int off; | ||
252 | |||
253 | res = vfs_listxattr(ovl_dentry_real(dentry), list, size); | ||
254 | if (res <= 0 || size == 0) | ||
255 | return res; | ||
256 | |||
257 | if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) | ||
258 | return res; | ||
259 | |||
260 | /* filter out private xattrs */ | ||
261 | for (off = 0; off < res;) { | ||
262 | char *s = list + off; | ||
263 | size_t slen = strlen(s) + 1; | ||
264 | |||
265 | BUG_ON(off + slen > res); | ||
266 | |||
267 | if (ovl_is_private_xattr(s)) { | ||
268 | res -= slen; | ||
269 | memmove(s, s + slen, res - off); | ||
270 | } else { | ||
271 | off += slen; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | return res; | ||
276 | } | ||
277 | |||
278 | int ovl_removexattr(struct dentry *dentry, const char *name) | ||
279 | { | ||
280 | int err; | ||
281 | struct path realpath; | ||
282 | enum ovl_path_type type; | ||
283 | |||
284 | err = ovl_want_write(dentry); | ||
285 | if (err) | ||
286 | goto out; | ||
287 | |||
288 | if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && | ||
289 | ovl_is_private_xattr(name)) | ||
290 | goto out_drop_write; | ||
291 | |||
292 | type = ovl_path_real(dentry, &realpath); | ||
293 | if (type == OVL_PATH_LOWER) { | ||
294 | err = vfs_getxattr(realpath.dentry, name, NULL, 0); | ||
295 | if (err < 0) | ||
296 | goto out_drop_write; | ||
297 | |||
298 | err = ovl_copy_up(dentry); | ||
299 | if (err) | ||
300 | goto out_drop_write; | ||
301 | |||
302 | ovl_path_upper(dentry, &realpath); | ||
303 | } | ||
304 | |||
305 | err = vfs_removexattr(realpath.dentry, name); | ||
306 | out_drop_write: | ||
307 | ovl_drop_write(dentry); | ||
308 | out: | ||
309 | return err; | ||
310 | } | ||
311 | |||
312 | static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, | ||
313 | struct dentry *realdentry) | ||
314 | { | ||
315 | if (type != OVL_PATH_LOWER) | ||
316 | return false; | ||
317 | |||
318 | if (special_file(realdentry->d_inode->i_mode)) | ||
319 | return false; | ||
320 | |||
321 | if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) | ||
322 | return false; | ||
323 | |||
324 | return true; | ||
325 | } | ||
326 | |||
327 | static int ovl_dentry_open(struct dentry *dentry, struct file *file, | ||
328 | const struct cred *cred) | ||
329 | { | ||
330 | int err; | ||
331 | struct path realpath; | ||
332 | enum ovl_path_type type; | ||
333 | bool want_write = false; | ||
334 | |||
335 | type = ovl_path_real(dentry, &realpath); | ||
336 | if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { | ||
337 | want_write = true; | ||
338 | err = ovl_want_write(dentry); | ||
339 | if (err) | ||
340 | goto out; | ||
341 | |||
342 | if (file->f_flags & O_TRUNC) | ||
343 | err = ovl_copy_up_last(dentry, NULL, true); | ||
344 | else | ||
345 | err = ovl_copy_up(dentry); | ||
346 | if (err) | ||
347 | goto out_drop_write; | ||
348 | |||
349 | ovl_path_upper(dentry, &realpath); | ||
350 | } | ||
351 | |||
352 | err = vfs_open(&realpath, file, cred); | ||
353 | out_drop_write: | ||
354 | if (want_write) | ||
355 | ovl_drop_write(dentry); | ||
356 | out: | ||
357 | return err; | ||
358 | } | ||
359 | |||
360 | static const struct inode_operations ovl_file_inode_operations = { | ||
361 | .setattr = ovl_setattr, | ||
362 | .permission = ovl_permission, | ||
363 | .getattr = ovl_getattr, | ||
364 | .setxattr = ovl_setxattr, | ||
365 | .getxattr = ovl_getxattr, | ||
366 | .listxattr = ovl_listxattr, | ||
367 | .removexattr = ovl_removexattr, | ||
368 | .dentry_open = ovl_dentry_open, | ||
369 | }; | ||
370 | |||
371 | static const struct inode_operations ovl_symlink_inode_operations = { | ||
372 | .setattr = ovl_setattr, | ||
373 | .follow_link = ovl_follow_link, | ||
374 | .put_link = ovl_put_link, | ||
375 | .readlink = ovl_readlink, | ||
376 | .getattr = ovl_getattr, | ||
377 | .setxattr = ovl_setxattr, | ||
378 | .getxattr = ovl_getxattr, | ||
379 | .listxattr = ovl_listxattr, | ||
380 | .removexattr = ovl_removexattr, | ||
381 | }; | ||
382 | |||
383 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
384 | struct ovl_entry *oe) | ||
385 | { | ||
386 | struct inode *inode; | ||
387 | |||
388 | inode = new_inode(sb); | ||
389 | if (!inode) | ||
390 | return NULL; | ||
391 | |||
392 | mode &= S_IFMT; | ||
393 | |||
394 | inode->i_ino = get_next_ino(); | ||
395 | inode->i_mode = mode; | ||
396 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | ||
397 | |||
398 | switch (mode) { | ||
399 | case S_IFDIR: | ||
400 | inode->i_private = oe; | ||
401 | inode->i_op = &ovl_dir_inode_operations; | ||
402 | inode->i_fop = &ovl_dir_operations; | ||
403 | break; | ||
404 | |||
405 | case S_IFLNK: | ||
406 | inode->i_op = &ovl_symlink_inode_operations; | ||
407 | break; | ||
408 | |||
409 | case S_IFREG: | ||
410 | case S_IFSOCK: | ||
411 | case S_IFBLK: | ||
412 | case S_IFCHR: | ||
413 | case S_IFIFO: | ||
414 | inode->i_op = &ovl_file_inode_operations; | ||
415 | break; | ||
416 | |||
417 | default: | ||
418 | WARN(1, "illegal file type: %i\n", mode); | ||
419 | iput(inode); | ||
420 | inode = NULL; | ||
421 | } | ||
422 | |||
423 | return inode; | ||
424 | |||
425 | } | ||
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h new file mode 100644 index 000000000000..814bed33dd07 --- /dev/null +++ b/fs/overlayfs/overlayfs.h | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | |||
12 | struct ovl_entry; | ||
13 | |||
14 | enum ovl_path_type { | ||
15 | OVL_PATH_PURE_UPPER, | ||
16 | OVL_PATH_UPPER, | ||
17 | OVL_PATH_MERGE, | ||
18 | OVL_PATH_LOWER, | ||
19 | }; | ||
20 | |||
21 | extern const char *ovl_opaque_xattr; | ||
22 | |||
23 | static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) | ||
24 | { | ||
25 | int err = vfs_rmdir(dir, dentry); | ||
26 | pr_debug("rmdir(%pd2) = %i\n", dentry, err); | ||
27 | return err; | ||
28 | } | ||
29 | |||
30 | static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) | ||
31 | { | ||
32 | int err = vfs_unlink(dir, dentry, NULL); | ||
33 | pr_debug("unlink(%pd2) = %i\n", dentry, err); | ||
34 | return err; | ||
35 | } | ||
36 | |||
37 | static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, | ||
38 | struct dentry *new_dentry, bool debug) | ||
39 | { | ||
40 | int err = vfs_link(old_dentry, dir, new_dentry, NULL); | ||
41 | if (debug) { | ||
42 | pr_debug("link(%pd2, %pd2) = %i\n", | ||
43 | old_dentry, new_dentry, err); | ||
44 | } | ||
45 | return err; | ||
46 | } | ||
47 | |||
48 | static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, | ||
49 | umode_t mode, bool debug) | ||
50 | { | ||
51 | int err = vfs_create(dir, dentry, mode, true); | ||
52 | if (debug) | ||
53 | pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
54 | return err; | ||
55 | } | ||
56 | |||
57 | static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, | ||
58 | umode_t mode, bool debug) | ||
59 | { | ||
60 | int err = vfs_mkdir(dir, dentry, mode); | ||
61 | if (debug) | ||
62 | pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
63 | return err; | ||
64 | } | ||
65 | |||
66 | static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, | ||
67 | umode_t mode, dev_t dev, bool debug) | ||
68 | { | ||
69 | int err = vfs_mknod(dir, dentry, mode, dev); | ||
70 | if (debug) { | ||
71 | pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", | ||
72 | dentry, mode, dev, err); | ||
73 | } | ||
74 | return err; | ||
75 | } | ||
76 | |||
77 | static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, | ||
78 | const char *oldname, bool debug) | ||
79 | { | ||
80 | int err = vfs_symlink(dir, dentry, oldname); | ||
81 | if (debug) | ||
82 | pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); | ||
83 | return err; | ||
84 | } | ||
85 | |||
86 | static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, | ||
87 | const void *value, size_t size, int flags) | ||
88 | { | ||
89 | int err = vfs_setxattr(dentry, name, value, size, flags); | ||
90 | pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", | ||
91 | dentry, name, (int) size, (char *) value, flags, err); | ||
92 | return err; | ||
93 | } | ||
94 | |||
95 | static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) | ||
96 | { | ||
97 | int err = vfs_removexattr(dentry, name); | ||
98 | pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); | ||
99 | return err; | ||
100 | } | ||
101 | |||
102 | static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, | ||
103 | struct inode *newdir, struct dentry *newdentry, | ||
104 | unsigned int flags) | ||
105 | { | ||
106 | int err; | ||
107 | |||
108 | pr_debug("rename2(%pd2, %pd2, 0x%x)\n", | ||
109 | olddentry, newdentry, flags); | ||
110 | |||
111 | err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); | ||
112 | |||
113 | if (err) { | ||
114 | pr_debug("...rename2(%pd2, %pd2, ...) = %i\n", | ||
115 | olddentry, newdentry, err); | ||
116 | } | ||
117 | return err; | ||
118 | } | ||
119 | |||
120 | static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) | ||
121 | { | ||
122 | int err = vfs_whiteout(dir, dentry); | ||
123 | pr_debug("whiteout(%pd2) = %i\n", dentry, err); | ||
124 | return err; | ||
125 | } | ||
126 | |||
127 | enum ovl_path_type ovl_path_type(struct dentry *dentry); | ||
128 | u64 ovl_dentry_version_get(struct dentry *dentry); | ||
129 | void ovl_dentry_version_inc(struct dentry *dentry); | ||
130 | void ovl_path_upper(struct dentry *dentry, struct path *path); | ||
131 | void ovl_path_lower(struct dentry *dentry, struct path *path); | ||
132 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); | ||
133 | struct dentry *ovl_dentry_upper(struct dentry *dentry); | ||
134 | struct dentry *ovl_dentry_lower(struct dentry *dentry); | ||
135 | struct dentry *ovl_dentry_real(struct dentry *dentry); | ||
136 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); | ||
137 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); | ||
138 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); | ||
139 | struct dentry *ovl_workdir(struct dentry *dentry); | ||
140 | int ovl_want_write(struct dentry *dentry); | ||
141 | void ovl_drop_write(struct dentry *dentry); | ||
142 | bool ovl_dentry_is_opaque(struct dentry *dentry); | ||
143 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); | ||
144 | bool ovl_is_whiteout(struct dentry *dentry); | ||
145 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); | ||
146 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
147 | unsigned int flags); | ||
148 | struct file *ovl_path_open(struct path *path, int flags); | ||
149 | |||
150 | struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, | ||
151 | struct kstat *stat, const char *link); | ||
152 | |||
153 | /* readdir.c */ | ||
154 | extern const struct file_operations ovl_dir_operations; | ||
155 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); | ||
156 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); | ||
157 | void ovl_cache_free(struct list_head *list); | ||
158 | |||
159 | /* inode.c */ | ||
160 | int ovl_setattr(struct dentry *dentry, struct iattr *attr); | ||
161 | int ovl_permission(struct inode *inode, int mask); | ||
162 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
163 | const void *value, size_t size, int flags); | ||
164 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
165 | void *value, size_t size); | ||
166 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); | ||
167 | int ovl_removexattr(struct dentry *dentry, const char *name); | ||
168 | |||
169 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
170 | struct ovl_entry *oe); | ||
171 | static inline void ovl_copyattr(struct inode *from, struct inode *to) | ||
172 | { | ||
173 | to->i_uid = from->i_uid; | ||
174 | to->i_gid = from->i_gid; | ||
175 | } | ||
176 | |||
177 | /* dir.c */ | ||
178 | extern const struct inode_operations ovl_dir_inode_operations; | ||
179 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); | ||
180 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
181 | struct kstat *stat, const char *link, | ||
182 | struct dentry *hardlink, bool debug); | ||
183 | void ovl_cleanup(struct inode *dir, struct dentry *dentry); | ||
184 | |||
185 | /* copy_up.c */ | ||
186 | int ovl_copy_up(struct dentry *dentry); | ||
187 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
188 | struct path *lowerpath, struct kstat *stat, | ||
189 | struct iattr *attr); | ||
190 | int ovl_copy_xattr(struct dentry *old, struct dentry *new); | ||
191 | int ovl_set_attr(struct dentry *upper, struct kstat *stat); | ||
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c new file mode 100644 index 000000000000..2a7ef4f8e2a6 --- /dev/null +++ b/fs/overlayfs/readdir.c | |||
@@ -0,0 +1,593 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/namei.h> | ||
13 | #include <linux/file.h> | ||
14 | #include <linux/xattr.h> | ||
15 | #include <linux/rbtree.h> | ||
16 | #include <linux/security.h> | ||
17 | #include <linux/cred.h> | ||
18 | #include "overlayfs.h" | ||
19 | |||
20 | struct ovl_cache_entry { | ||
21 | unsigned int len; | ||
22 | unsigned int type; | ||
23 | u64 ino; | ||
24 | struct list_head l_node; | ||
25 | struct rb_node node; | ||
26 | bool is_whiteout; | ||
27 | bool is_cursor; | ||
28 | char name[]; | ||
29 | }; | ||
30 | |||
31 | struct ovl_dir_cache { | ||
32 | long refcount; | ||
33 | u64 version; | ||
34 | struct list_head entries; | ||
35 | }; | ||
36 | |||
37 | struct ovl_readdir_data { | ||
38 | struct dir_context ctx; | ||
39 | bool is_merge; | ||
40 | struct rb_root root; | ||
41 | struct list_head *list; | ||
42 | struct list_head middle; | ||
43 | int count; | ||
44 | int err; | ||
45 | }; | ||
46 | |||
47 | struct ovl_dir_file { | ||
48 | bool is_real; | ||
49 | bool is_upper; | ||
50 | struct ovl_dir_cache *cache; | ||
51 | struct ovl_cache_entry cursor; | ||
52 | struct file *realfile; | ||
53 | struct file *upperfile; | ||
54 | }; | ||
55 | |||
56 | static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) | ||
57 | { | ||
58 | return container_of(n, struct ovl_cache_entry, node); | ||
59 | } | ||
60 | |||
61 | static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, | ||
62 | const char *name, int len) | ||
63 | { | ||
64 | struct rb_node *node = root->rb_node; | ||
65 | int cmp; | ||
66 | |||
67 | while (node) { | ||
68 | struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); | ||
69 | |||
70 | cmp = strncmp(name, p->name, len); | ||
71 | if (cmp > 0) | ||
72 | node = p->node.rb_right; | ||
73 | else if (cmp < 0 || len < p->len) | ||
74 | node = p->node.rb_left; | ||
75 | else | ||
76 | return p; | ||
77 | } | ||
78 | |||
79 | return NULL; | ||
80 | } | ||
81 | |||
82 | static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, | ||
83 | u64 ino, unsigned int d_type) | ||
84 | { | ||
85 | struct ovl_cache_entry *p; | ||
86 | size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); | ||
87 | |||
88 | p = kmalloc(size, GFP_KERNEL); | ||
89 | if (p) { | ||
90 | memcpy(p->name, name, len); | ||
91 | p->name[len] = '\0'; | ||
92 | p->len = len; | ||
93 | p->type = d_type; | ||
94 | p->ino = ino; | ||
95 | p->is_whiteout = false; | ||
96 | p->is_cursor = false; | ||
97 | } | ||
98 | |||
99 | return p; | ||
100 | } | ||
101 | |||
102 | static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, | ||
103 | const char *name, int len, u64 ino, | ||
104 | unsigned int d_type) | ||
105 | { | ||
106 | struct rb_node **newp = &rdd->root.rb_node; | ||
107 | struct rb_node *parent = NULL; | ||
108 | struct ovl_cache_entry *p; | ||
109 | |||
110 | while (*newp) { | ||
111 | int cmp; | ||
112 | struct ovl_cache_entry *tmp; | ||
113 | |||
114 | parent = *newp; | ||
115 | tmp = ovl_cache_entry_from_node(*newp); | ||
116 | cmp = strncmp(name, tmp->name, len); | ||
117 | if (cmp > 0) | ||
118 | newp = &tmp->node.rb_right; | ||
119 | else if (cmp < 0 || len < tmp->len) | ||
120 | newp = &tmp->node.rb_left; | ||
121 | else | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | p = ovl_cache_entry_new(name, len, ino, d_type); | ||
126 | if (p == NULL) | ||
127 | return -ENOMEM; | ||
128 | |||
129 | list_add_tail(&p->l_node, rdd->list); | ||
130 | rb_link_node(&p->node, parent, newp); | ||
131 | rb_insert_color(&p->node, &rdd->root); | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int ovl_fill_lower(struct ovl_readdir_data *rdd, | ||
137 | const char *name, int namelen, | ||
138 | loff_t offset, u64 ino, unsigned int d_type) | ||
139 | { | ||
140 | struct ovl_cache_entry *p; | ||
141 | |||
142 | p = ovl_cache_entry_find(&rdd->root, name, namelen); | ||
143 | if (p) { | ||
144 | list_move_tail(&p->l_node, &rdd->middle); | ||
145 | } else { | ||
146 | p = ovl_cache_entry_new(name, namelen, ino, d_type); | ||
147 | if (p == NULL) | ||
148 | rdd->err = -ENOMEM; | ||
149 | else | ||
150 | list_add_tail(&p->l_node, &rdd->middle); | ||
151 | } | ||
152 | |||
153 | return rdd->err; | ||
154 | } | ||
155 | |||
156 | void ovl_cache_free(struct list_head *list) | ||
157 | { | ||
158 | struct ovl_cache_entry *p; | ||
159 | struct ovl_cache_entry *n; | ||
160 | |||
161 | list_for_each_entry_safe(p, n, list, l_node) | ||
162 | kfree(p); | ||
163 | |||
164 | INIT_LIST_HEAD(list); | ||
165 | } | ||
166 | |||
167 | static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) | ||
168 | { | ||
169 | struct ovl_dir_cache *cache = od->cache; | ||
170 | |||
171 | list_del_init(&od->cursor.l_node); | ||
172 | WARN_ON(cache->refcount <= 0); | ||
173 | cache->refcount--; | ||
174 | if (!cache->refcount) { | ||
175 | if (ovl_dir_cache(dentry) == cache) | ||
176 | ovl_set_dir_cache(dentry, NULL); | ||
177 | |||
178 | ovl_cache_free(&cache->entries); | ||
179 | kfree(cache); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static int ovl_fill_merge(void *buf, const char *name, int namelen, | ||
184 | loff_t offset, u64 ino, unsigned int d_type) | ||
185 | { | ||
186 | struct ovl_readdir_data *rdd = buf; | ||
187 | |||
188 | rdd->count++; | ||
189 | if (!rdd->is_merge) | ||
190 | return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); | ||
191 | else | ||
192 | return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); | ||
193 | } | ||
194 | |||
195 | static inline int ovl_dir_read(struct path *realpath, | ||
196 | struct ovl_readdir_data *rdd) | ||
197 | { | ||
198 | struct file *realfile; | ||
199 | int err; | ||
200 | |||
201 | realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); | ||
202 | if (IS_ERR(realfile)) | ||
203 | return PTR_ERR(realfile); | ||
204 | |||
205 | rdd->ctx.pos = 0; | ||
206 | do { | ||
207 | rdd->count = 0; | ||
208 | rdd->err = 0; | ||
209 | err = iterate_dir(realfile, &rdd->ctx); | ||
210 | if (err >= 0) | ||
211 | err = rdd->err; | ||
212 | } while (!err && rdd->count); | ||
213 | fput(realfile); | ||
214 | |||
215 | return err; | ||
216 | } | ||
217 | |||
218 | static void ovl_dir_reset(struct file *file) | ||
219 | { | ||
220 | struct ovl_dir_file *od = file->private_data; | ||
221 | struct ovl_dir_cache *cache = od->cache; | ||
222 | struct dentry *dentry = file->f_path.dentry; | ||
223 | enum ovl_path_type type = ovl_path_type(dentry); | ||
224 | |||
225 | if (cache && ovl_dentry_version_get(dentry) != cache->version) { | ||
226 | ovl_cache_put(od, dentry); | ||
227 | od->cache = NULL; | ||
228 | } | ||
229 | WARN_ON(!od->is_real && type != OVL_PATH_MERGE); | ||
230 | if (od->is_real && type == OVL_PATH_MERGE) | ||
231 | od->is_real = false; | ||
232 | } | ||
233 | |||
234 | static int ovl_dir_mark_whiteouts(struct dentry *dir, | ||
235 | struct ovl_readdir_data *rdd) | ||
236 | { | ||
237 | struct ovl_cache_entry *p; | ||
238 | struct dentry *dentry; | ||
239 | const struct cred *old_cred; | ||
240 | struct cred *override_cred; | ||
241 | |||
242 | override_cred = prepare_creds(); | ||
243 | if (!override_cred) { | ||
244 | ovl_cache_free(rdd->list); | ||
245 | return -ENOMEM; | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * CAP_DAC_OVERRIDE for lookup | ||
250 | */ | ||
251 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
252 | old_cred = override_creds(override_cred); | ||
253 | |||
254 | mutex_lock(&dir->d_inode->i_mutex); | ||
255 | list_for_each_entry(p, rdd->list, l_node) { | ||
256 | if (p->is_cursor) | ||
257 | continue; | ||
258 | |||
259 | if (p->type != DT_CHR) | ||
260 | continue; | ||
261 | |||
262 | dentry = lookup_one_len(p->name, dir, p->len); | ||
263 | if (IS_ERR(dentry)) | ||
264 | continue; | ||
265 | |||
266 | p->is_whiteout = ovl_is_whiteout(dentry); | ||
267 | dput(dentry); | ||
268 | } | ||
269 | mutex_unlock(&dir->d_inode->i_mutex); | ||
270 | |||
271 | revert_creds(old_cred); | ||
272 | put_cred(override_cred); | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static inline int ovl_dir_read_merged(struct path *upperpath, | ||
278 | struct path *lowerpath, | ||
279 | struct list_head *list) | ||
280 | { | ||
281 | int err; | ||
282 | struct ovl_readdir_data rdd = { | ||
283 | .ctx.actor = ovl_fill_merge, | ||
284 | .list = list, | ||
285 | .root = RB_ROOT, | ||
286 | .is_merge = false, | ||
287 | }; | ||
288 | |||
289 | if (upperpath->dentry) { | ||
290 | err = ovl_dir_read(upperpath, &rdd); | ||
291 | if (err) | ||
292 | goto out; | ||
293 | |||
294 | if (lowerpath->dentry) { | ||
295 | err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); | ||
296 | if (err) | ||
297 | goto out; | ||
298 | } | ||
299 | } | ||
300 | if (lowerpath->dentry) { | ||
301 | /* | ||
302 | * Insert lowerpath entries before upperpath ones, this allows | ||
303 | * offsets to be reasonably constant | ||
304 | */ | ||
305 | list_add(&rdd.middle, rdd.list); | ||
306 | rdd.is_merge = true; | ||
307 | err = ovl_dir_read(lowerpath, &rdd); | ||
308 | list_del(&rdd.middle); | ||
309 | } | ||
310 | out: | ||
311 | return err; | ||
312 | } | ||
313 | |||
314 | static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) | ||
315 | { | ||
316 | struct ovl_cache_entry *p; | ||
317 | loff_t off = 0; | ||
318 | |||
319 | list_for_each_entry(p, &od->cache->entries, l_node) { | ||
320 | if (p->is_cursor) | ||
321 | continue; | ||
322 | if (off >= pos) | ||
323 | break; | ||
324 | off++; | ||
325 | } | ||
326 | list_move_tail(&od->cursor.l_node, &p->l_node); | ||
327 | } | ||
328 | |||
329 | static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) | ||
330 | { | ||
331 | int res; | ||
332 | struct path lowerpath; | ||
333 | struct path upperpath; | ||
334 | struct ovl_dir_cache *cache; | ||
335 | |||
336 | cache = ovl_dir_cache(dentry); | ||
337 | if (cache && ovl_dentry_version_get(dentry) == cache->version) { | ||
338 | cache->refcount++; | ||
339 | return cache; | ||
340 | } | ||
341 | ovl_set_dir_cache(dentry, NULL); | ||
342 | |||
343 | cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); | ||
344 | if (!cache) | ||
345 | return ERR_PTR(-ENOMEM); | ||
346 | |||
347 | cache->refcount = 1; | ||
348 | INIT_LIST_HEAD(&cache->entries); | ||
349 | |||
350 | ovl_path_lower(dentry, &lowerpath); | ||
351 | ovl_path_upper(dentry, &upperpath); | ||
352 | |||
353 | res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); | ||
354 | if (res) { | ||
355 | ovl_cache_free(&cache->entries); | ||
356 | kfree(cache); | ||
357 | return ERR_PTR(res); | ||
358 | } | ||
359 | |||
360 | cache->version = ovl_dentry_version_get(dentry); | ||
361 | ovl_set_dir_cache(dentry, cache); | ||
362 | |||
363 | return cache; | ||
364 | } | ||
365 | |||
366 | static int ovl_iterate(struct file *file, struct dir_context *ctx) | ||
367 | { | ||
368 | struct ovl_dir_file *od = file->private_data; | ||
369 | struct dentry *dentry = file->f_path.dentry; | ||
370 | |||
371 | if (!ctx->pos) | ||
372 | ovl_dir_reset(file); | ||
373 | |||
374 | if (od->is_real) | ||
375 | return iterate_dir(od->realfile, ctx); | ||
376 | |||
377 | if (!od->cache) { | ||
378 | struct ovl_dir_cache *cache; | ||
379 | |||
380 | cache = ovl_cache_get(dentry); | ||
381 | if (IS_ERR(cache)) | ||
382 | return PTR_ERR(cache); | ||
383 | |||
384 | od->cache = cache; | ||
385 | ovl_seek_cursor(od, ctx->pos); | ||
386 | } | ||
387 | |||
388 | while (od->cursor.l_node.next != &od->cache->entries) { | ||
389 | struct ovl_cache_entry *p; | ||
390 | |||
391 | p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); | ||
392 | /* Skip cursors */ | ||
393 | if (!p->is_cursor) { | ||
394 | if (!p->is_whiteout) { | ||
395 | if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) | ||
396 | break; | ||
397 | } | ||
398 | ctx->pos++; | ||
399 | } | ||
400 | list_move(&od->cursor.l_node, &p->l_node); | ||
401 | } | ||
402 | return 0; | ||
403 | } | ||
404 | |||
405 | static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) | ||
406 | { | ||
407 | loff_t res; | ||
408 | struct ovl_dir_file *od = file->private_data; | ||
409 | |||
410 | mutex_lock(&file_inode(file)->i_mutex); | ||
411 | if (!file->f_pos) | ||
412 | ovl_dir_reset(file); | ||
413 | |||
414 | if (od->is_real) { | ||
415 | res = vfs_llseek(od->realfile, offset, origin); | ||
416 | file->f_pos = od->realfile->f_pos; | ||
417 | } else { | ||
418 | res = -EINVAL; | ||
419 | |||
420 | switch (origin) { | ||
421 | case SEEK_CUR: | ||
422 | offset += file->f_pos; | ||
423 | break; | ||
424 | case SEEK_SET: | ||
425 | break; | ||
426 | default: | ||
427 | goto out_unlock; | ||
428 | } | ||
429 | if (offset < 0) | ||
430 | goto out_unlock; | ||
431 | |||
432 | if (offset != file->f_pos) { | ||
433 | file->f_pos = offset; | ||
434 | if (od->cache) | ||
435 | ovl_seek_cursor(od, offset); | ||
436 | } | ||
437 | res = offset; | ||
438 | } | ||
439 | out_unlock: | ||
440 | mutex_unlock(&file_inode(file)->i_mutex); | ||
441 | |||
442 | return res; | ||
443 | } | ||
444 | |||
445 | static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | ||
446 | int datasync) | ||
447 | { | ||
448 | struct ovl_dir_file *od = file->private_data; | ||
449 | struct dentry *dentry = file->f_path.dentry; | ||
450 | struct file *realfile = od->realfile; | ||
451 | |||
452 | /* | ||
453 | * Need to check if we started out being a lower dir, but got copied up | ||
454 | */ | ||
455 | if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { | ||
456 | struct inode *inode = file_inode(file); | ||
457 | |||
458 | realfile =lockless_dereference(od->upperfile); | ||
459 | if (!realfile) { | ||
460 | struct path upperpath; | ||
461 | |||
462 | ovl_path_upper(dentry, &upperpath); | ||
463 | realfile = ovl_path_open(&upperpath, O_RDONLY); | ||
464 | smp_mb__before_spinlock(); | ||
465 | mutex_lock(&inode->i_mutex); | ||
466 | if (!od->upperfile) { | ||
467 | if (IS_ERR(realfile)) { | ||
468 | mutex_unlock(&inode->i_mutex); | ||
469 | return PTR_ERR(realfile); | ||
470 | } | ||
471 | od->upperfile = realfile; | ||
472 | } else { | ||
473 | /* somebody has beaten us to it */ | ||
474 | if (!IS_ERR(realfile)) | ||
475 | fput(realfile); | ||
476 | realfile = od->upperfile; | ||
477 | } | ||
478 | mutex_unlock(&inode->i_mutex); | ||
479 | } | ||
480 | } | ||
481 | |||
482 | return vfs_fsync_range(realfile, start, end, datasync); | ||
483 | } | ||
484 | |||
485 | static int ovl_dir_release(struct inode *inode, struct file *file) | ||
486 | { | ||
487 | struct ovl_dir_file *od = file->private_data; | ||
488 | |||
489 | if (od->cache) { | ||
490 | mutex_lock(&inode->i_mutex); | ||
491 | ovl_cache_put(od, file->f_path.dentry); | ||
492 | mutex_unlock(&inode->i_mutex); | ||
493 | } | ||
494 | fput(od->realfile); | ||
495 | if (od->upperfile) | ||
496 | fput(od->upperfile); | ||
497 | kfree(od); | ||
498 | |||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | static int ovl_dir_open(struct inode *inode, struct file *file) | ||
503 | { | ||
504 | struct path realpath; | ||
505 | struct file *realfile; | ||
506 | struct ovl_dir_file *od; | ||
507 | enum ovl_path_type type; | ||
508 | |||
509 | od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); | ||
510 | if (!od) | ||
511 | return -ENOMEM; | ||
512 | |||
513 | type = ovl_path_real(file->f_path.dentry, &realpath); | ||
514 | realfile = ovl_path_open(&realpath, file->f_flags); | ||
515 | if (IS_ERR(realfile)) { | ||
516 | kfree(od); | ||
517 | return PTR_ERR(realfile); | ||
518 | } | ||
519 | INIT_LIST_HEAD(&od->cursor.l_node); | ||
520 | od->realfile = realfile; | ||
521 | od->is_real = (type != OVL_PATH_MERGE); | ||
522 | od->is_upper = (type != OVL_PATH_LOWER); | ||
523 | od->cursor.is_cursor = true; | ||
524 | file->private_data = od; | ||
525 | |||
526 | return 0; | ||
527 | } | ||
528 | |||
529 | const struct file_operations ovl_dir_operations = { | ||
530 | .read = generic_read_dir, | ||
531 | .open = ovl_dir_open, | ||
532 | .iterate = ovl_iterate, | ||
533 | .llseek = ovl_dir_llseek, | ||
534 | .fsync = ovl_dir_fsync, | ||
535 | .release = ovl_dir_release, | ||
536 | }; | ||
537 | |||
538 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) | ||
539 | { | ||
540 | int err; | ||
541 | struct path lowerpath; | ||
542 | struct path upperpath; | ||
543 | struct ovl_cache_entry *p; | ||
544 | |||
545 | ovl_path_upper(dentry, &upperpath); | ||
546 | ovl_path_lower(dentry, &lowerpath); | ||
547 | |||
548 | err = ovl_dir_read_merged(&upperpath, &lowerpath, list); | ||
549 | if (err) | ||
550 | return err; | ||
551 | |||
552 | err = 0; | ||
553 | |||
554 | list_for_each_entry(p, list, l_node) { | ||
555 | if (p->is_whiteout) | ||
556 | continue; | ||
557 | |||
558 | if (p->name[0] == '.') { | ||
559 | if (p->len == 1) | ||
560 | continue; | ||
561 | if (p->len == 2 && p->name[1] == '.') | ||
562 | continue; | ||
563 | } | ||
564 | err = -ENOTEMPTY; | ||
565 | break; | ||
566 | } | ||
567 | |||
568 | return err; | ||
569 | } | ||
570 | |||
571 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) | ||
572 | { | ||
573 | struct ovl_cache_entry *p; | ||
574 | |||
575 | mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); | ||
576 | list_for_each_entry(p, list, l_node) { | ||
577 | struct dentry *dentry; | ||
578 | |||
579 | if (!p->is_whiteout) | ||
580 | continue; | ||
581 | |||
582 | dentry = lookup_one_len(p->name, upper, p->len); | ||
583 | if (IS_ERR(dentry)) { | ||
584 | pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", | ||
585 | upper->d_name.name, p->len, p->name, | ||
586 | (int) PTR_ERR(dentry)); | ||
587 | continue; | ||
588 | } | ||
589 | ovl_cleanup(upper->d_inode, dentry); | ||
590 | dput(dentry); | ||
591 | } | ||
592 | mutex_unlock(&upper->d_inode->i_mutex); | ||
593 | } | ||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c new file mode 100644 index 000000000000..08b704cebfc4 --- /dev/null +++ b/fs/overlayfs/super.c | |||
@@ -0,0 +1,796 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (C) 2011 Novell Inc. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published by | ||
7 | * the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/fs.h> | ||
11 | #include <linux/namei.h> | ||
12 | #include <linux/xattr.h> | ||
13 | #include <linux/security.h> | ||
14 | #include <linux/mount.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/parser.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/statfs.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include "overlayfs.h" | ||
22 | |||
23 | MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); | ||
24 | MODULE_DESCRIPTION("Overlay filesystem"); | ||
25 | MODULE_LICENSE("GPL"); | ||
26 | |||
27 | #define OVERLAYFS_SUPER_MAGIC 0x794c764f | ||
28 | |||
29 | struct ovl_config { | ||
30 | char *lowerdir; | ||
31 | char *upperdir; | ||
32 | char *workdir; | ||
33 | }; | ||
34 | |||
35 | /* private information held for overlayfs's superblock */ | ||
36 | struct ovl_fs { | ||
37 | struct vfsmount *upper_mnt; | ||
38 | struct vfsmount *lower_mnt; | ||
39 | struct dentry *workdir; | ||
40 | long lower_namelen; | ||
41 | /* pathnames of lower and upper dirs, for show_options */ | ||
42 | struct ovl_config config; | ||
43 | }; | ||
44 | |||
45 | struct ovl_dir_cache; | ||
46 | |||
47 | /* private information held for every overlayfs dentry */ | ||
48 | struct ovl_entry { | ||
49 | struct dentry *__upperdentry; | ||
50 | struct dentry *lowerdentry; | ||
51 | struct ovl_dir_cache *cache; | ||
52 | union { | ||
53 | struct { | ||
54 | u64 version; | ||
55 | bool opaque; | ||
56 | }; | ||
57 | struct rcu_head rcu; | ||
58 | }; | ||
59 | }; | ||
60 | |||
61 | const char *ovl_opaque_xattr = "trusted.overlay.opaque"; | ||
62 | |||
63 | |||
64 | enum ovl_path_type ovl_path_type(struct dentry *dentry) | ||
65 | { | ||
66 | struct ovl_entry *oe = dentry->d_fsdata; | ||
67 | |||
68 | if (oe->__upperdentry) { | ||
69 | if (oe->lowerdentry) { | ||
70 | if (S_ISDIR(dentry->d_inode->i_mode)) | ||
71 | return OVL_PATH_MERGE; | ||
72 | else | ||
73 | return OVL_PATH_UPPER; | ||
74 | } else { | ||
75 | if (oe->opaque) | ||
76 | return OVL_PATH_UPPER; | ||
77 | else | ||
78 | return OVL_PATH_PURE_UPPER; | ||
79 | } | ||
80 | } else { | ||
81 | return OVL_PATH_LOWER; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) | ||
86 | { | ||
87 | struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); | ||
88 | /* | ||
89 | * Make sure to order reads to upperdentry wrt ovl_dentry_update() | ||
90 | */ | ||
91 | smp_read_barrier_depends(); | ||
92 | return upperdentry; | ||
93 | } | ||
94 | |||
95 | void ovl_path_upper(struct dentry *dentry, struct path *path) | ||
96 | { | ||
97 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
98 | struct ovl_entry *oe = dentry->d_fsdata; | ||
99 | |||
100 | path->mnt = ofs->upper_mnt; | ||
101 | path->dentry = ovl_upperdentry_dereference(oe); | ||
102 | } | ||
103 | |||
104 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) | ||
105 | { | ||
106 | |||
107 | enum ovl_path_type type = ovl_path_type(dentry); | ||
108 | |||
109 | if (type == OVL_PATH_LOWER) | ||
110 | ovl_path_lower(dentry, path); | ||
111 | else | ||
112 | ovl_path_upper(dentry, path); | ||
113 | |||
114 | return type; | ||
115 | } | ||
116 | |||
117 | struct dentry *ovl_dentry_upper(struct dentry *dentry) | ||
118 | { | ||
119 | struct ovl_entry *oe = dentry->d_fsdata; | ||
120 | |||
121 | return ovl_upperdentry_dereference(oe); | ||
122 | } | ||
123 | |||
124 | struct dentry *ovl_dentry_lower(struct dentry *dentry) | ||
125 | { | ||
126 | struct ovl_entry *oe = dentry->d_fsdata; | ||
127 | |||
128 | return oe->lowerdentry; | ||
129 | } | ||
130 | |||
131 | struct dentry *ovl_dentry_real(struct dentry *dentry) | ||
132 | { | ||
133 | struct ovl_entry *oe = dentry->d_fsdata; | ||
134 | struct dentry *realdentry; | ||
135 | |||
136 | realdentry = ovl_upperdentry_dereference(oe); | ||
137 | if (!realdentry) | ||
138 | realdentry = oe->lowerdentry; | ||
139 | |||
140 | return realdentry; | ||
141 | } | ||
142 | |||
143 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) | ||
144 | { | ||
145 | struct dentry *realdentry; | ||
146 | |||
147 | realdentry = ovl_upperdentry_dereference(oe); | ||
148 | if (realdentry) { | ||
149 | *is_upper = true; | ||
150 | } else { | ||
151 | realdentry = oe->lowerdentry; | ||
152 | *is_upper = false; | ||
153 | } | ||
154 | return realdentry; | ||
155 | } | ||
156 | |||
157 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) | ||
158 | { | ||
159 | struct ovl_entry *oe = dentry->d_fsdata; | ||
160 | |||
161 | return oe->cache; | ||
162 | } | ||
163 | |||
164 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) | ||
165 | { | ||
166 | struct ovl_entry *oe = dentry->d_fsdata; | ||
167 | |||
168 | oe->cache = cache; | ||
169 | } | ||
170 | |||
171 | void ovl_path_lower(struct dentry *dentry, struct path *path) | ||
172 | { | ||
173 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
174 | struct ovl_entry *oe = dentry->d_fsdata; | ||
175 | |||
176 | path->mnt = ofs->lower_mnt; | ||
177 | path->dentry = oe->lowerdentry; | ||
178 | } | ||
179 | |||
180 | int ovl_want_write(struct dentry *dentry) | ||
181 | { | ||
182 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
183 | return mnt_want_write(ofs->upper_mnt); | ||
184 | } | ||
185 | |||
186 | void ovl_drop_write(struct dentry *dentry) | ||
187 | { | ||
188 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
189 | mnt_drop_write(ofs->upper_mnt); | ||
190 | } | ||
191 | |||
192 | struct dentry *ovl_workdir(struct dentry *dentry) | ||
193 | { | ||
194 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
195 | return ofs->workdir; | ||
196 | } | ||
197 | |||
198 | bool ovl_dentry_is_opaque(struct dentry *dentry) | ||
199 | { | ||
200 | struct ovl_entry *oe = dentry->d_fsdata; | ||
201 | return oe->opaque; | ||
202 | } | ||
203 | |||
204 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) | ||
205 | { | ||
206 | struct ovl_entry *oe = dentry->d_fsdata; | ||
207 | oe->opaque = opaque; | ||
208 | } | ||
209 | |||
210 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) | ||
211 | { | ||
212 | struct ovl_entry *oe = dentry->d_fsdata; | ||
213 | |||
214 | WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); | ||
215 | WARN_ON(oe->__upperdentry); | ||
216 | BUG_ON(!upperdentry->d_inode); | ||
217 | /* | ||
218 | * Make sure upperdentry is consistent before making it visible to | ||
219 | * ovl_upperdentry_dereference(). | ||
220 | */ | ||
221 | smp_wmb(); | ||
222 | oe->__upperdentry = upperdentry; | ||
223 | } | ||
224 | |||
225 | void ovl_dentry_version_inc(struct dentry *dentry) | ||
226 | { | ||
227 | struct ovl_entry *oe = dentry->d_fsdata; | ||
228 | |||
229 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
230 | oe->version++; | ||
231 | } | ||
232 | |||
233 | u64 ovl_dentry_version_get(struct dentry *dentry) | ||
234 | { | ||
235 | struct ovl_entry *oe = dentry->d_fsdata; | ||
236 | |||
237 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
238 | return oe->version; | ||
239 | } | ||
240 | |||
241 | bool ovl_is_whiteout(struct dentry *dentry) | ||
242 | { | ||
243 | struct inode *inode = dentry->d_inode; | ||
244 | |||
245 | return inode && IS_WHITEOUT(inode); | ||
246 | } | ||
247 | |||
248 | static bool ovl_is_opaquedir(struct dentry *dentry) | ||
249 | { | ||
250 | int res; | ||
251 | char val; | ||
252 | struct inode *inode = dentry->d_inode; | ||
253 | |||
254 | if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) | ||
255 | return false; | ||
256 | |||
257 | res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); | ||
258 | if (res == 1 && val == 'y') | ||
259 | return true; | ||
260 | |||
261 | return false; | ||
262 | } | ||
263 | |||
264 | static void ovl_dentry_release(struct dentry *dentry) | ||
265 | { | ||
266 | struct ovl_entry *oe = dentry->d_fsdata; | ||
267 | |||
268 | if (oe) { | ||
269 | dput(oe->__upperdentry); | ||
270 | dput(oe->lowerdentry); | ||
271 | kfree_rcu(oe, rcu); | ||
272 | } | ||
273 | } | ||
274 | |||
275 | static const struct dentry_operations ovl_dentry_operations = { | ||
276 | .d_release = ovl_dentry_release, | ||
277 | }; | ||
278 | |||
279 | static struct ovl_entry *ovl_alloc_entry(void) | ||
280 | { | ||
281 | return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); | ||
282 | } | ||
283 | |||
284 | static inline struct dentry *ovl_lookup_real(struct dentry *dir, | ||
285 | struct qstr *name) | ||
286 | { | ||
287 | struct dentry *dentry; | ||
288 | |||
289 | mutex_lock(&dir->d_inode->i_mutex); | ||
290 | dentry = lookup_one_len(name->name, dir, name->len); | ||
291 | mutex_unlock(&dir->d_inode->i_mutex); | ||
292 | |||
293 | if (IS_ERR(dentry)) { | ||
294 | if (PTR_ERR(dentry) == -ENOENT) | ||
295 | dentry = NULL; | ||
296 | } else if (!dentry->d_inode) { | ||
297 | dput(dentry); | ||
298 | dentry = NULL; | ||
299 | } | ||
300 | return dentry; | ||
301 | } | ||
302 | |||
303 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
304 | unsigned int flags) | ||
305 | { | ||
306 | struct ovl_entry *oe; | ||
307 | struct dentry *upperdir; | ||
308 | struct dentry *lowerdir; | ||
309 | struct dentry *upperdentry = NULL; | ||
310 | struct dentry *lowerdentry = NULL; | ||
311 | struct inode *inode = NULL; | ||
312 | int err; | ||
313 | |||
314 | err = -ENOMEM; | ||
315 | oe = ovl_alloc_entry(); | ||
316 | if (!oe) | ||
317 | goto out; | ||
318 | |||
319 | upperdir = ovl_dentry_upper(dentry->d_parent); | ||
320 | lowerdir = ovl_dentry_lower(dentry->d_parent); | ||
321 | |||
322 | if (upperdir) { | ||
323 | upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); | ||
324 | err = PTR_ERR(upperdentry); | ||
325 | if (IS_ERR(upperdentry)) | ||
326 | goto out_put_dir; | ||
327 | |||
328 | if (lowerdir && upperdentry) { | ||
329 | if (ovl_is_whiteout(upperdentry)) { | ||
330 | dput(upperdentry); | ||
331 | upperdentry = NULL; | ||
332 | oe->opaque = true; | ||
333 | } else if (ovl_is_opaquedir(upperdentry)) { | ||
334 | oe->opaque = true; | ||
335 | } | ||
336 | } | ||
337 | } | ||
338 | if (lowerdir && !oe->opaque) { | ||
339 | lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); | ||
340 | err = PTR_ERR(lowerdentry); | ||
341 | if (IS_ERR(lowerdentry)) | ||
342 | goto out_dput_upper; | ||
343 | } | ||
344 | |||
345 | if (lowerdentry && upperdentry && | ||
346 | (!S_ISDIR(upperdentry->d_inode->i_mode) || | ||
347 | !S_ISDIR(lowerdentry->d_inode->i_mode))) { | ||
348 | dput(lowerdentry); | ||
349 | lowerdentry = NULL; | ||
350 | oe->opaque = true; | ||
351 | } | ||
352 | |||
353 | if (lowerdentry || upperdentry) { | ||
354 | struct dentry *realdentry; | ||
355 | |||
356 | realdentry = upperdentry ? upperdentry : lowerdentry; | ||
357 | err = -ENOMEM; | ||
358 | inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, | ||
359 | oe); | ||
360 | if (!inode) | ||
361 | goto out_dput; | ||
362 | ovl_copyattr(realdentry->d_inode, inode); | ||
363 | } | ||
364 | |||
365 | oe->__upperdentry = upperdentry; | ||
366 | oe->lowerdentry = lowerdentry; | ||
367 | |||
368 | dentry->d_fsdata = oe; | ||
369 | d_add(dentry, inode); | ||
370 | |||
371 | return NULL; | ||
372 | |||
373 | out_dput: | ||
374 | dput(lowerdentry); | ||
375 | out_dput_upper: | ||
376 | dput(upperdentry); | ||
377 | out_put_dir: | ||
378 | kfree(oe); | ||
379 | out: | ||
380 | return ERR_PTR(err); | ||
381 | } | ||
382 | |||
383 | struct file *ovl_path_open(struct path *path, int flags) | ||
384 | { | ||
385 | return dentry_open(path, flags, current_cred()); | ||
386 | } | ||
387 | |||
388 | static void ovl_put_super(struct super_block *sb) | ||
389 | { | ||
390 | struct ovl_fs *ufs = sb->s_fs_info; | ||
391 | |||
392 | dput(ufs->workdir); | ||
393 | mntput(ufs->upper_mnt); | ||
394 | mntput(ufs->lower_mnt); | ||
395 | |||
396 | kfree(ufs->config.lowerdir); | ||
397 | kfree(ufs->config.upperdir); | ||
398 | kfree(ufs->config.workdir); | ||
399 | kfree(ufs); | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * ovl_statfs | ||
404 | * @sb: The overlayfs super block | ||
405 | * @buf: The struct kstatfs to fill in with stats | ||
406 | * | ||
407 | * Get the filesystem statistics. As writes always target the upper layer | ||
408 | * filesystem pass the statfs to the same filesystem. | ||
409 | */ | ||
410 | static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
411 | { | ||
412 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
413 | struct dentry *root_dentry = dentry->d_sb->s_root; | ||
414 | struct path path; | ||
415 | int err; | ||
416 | |||
417 | ovl_path_upper(root_dentry, &path); | ||
418 | |||
419 | err = vfs_statfs(&path, buf); | ||
420 | if (!err) { | ||
421 | buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); | ||
422 | buf->f_type = OVERLAYFS_SUPER_MAGIC; | ||
423 | } | ||
424 | |||
425 | return err; | ||
426 | } | ||
427 | |||
428 | /** | ||
429 | * ovl_show_options | ||
430 | * | ||
431 | * Prints the mount options for a given superblock. | ||
432 | * Returns zero; does not fail. | ||
433 | */ | ||
434 | static int ovl_show_options(struct seq_file *m, struct dentry *dentry) | ||
435 | { | ||
436 | struct super_block *sb = dentry->d_sb; | ||
437 | struct ovl_fs *ufs = sb->s_fs_info; | ||
438 | |||
439 | seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); | ||
440 | seq_printf(m, ",upperdir=%s", ufs->config.upperdir); | ||
441 | seq_printf(m, ",workdir=%s", ufs->config.workdir); | ||
442 | return 0; | ||
443 | } | ||
444 | |||
445 | static const struct super_operations ovl_super_operations = { | ||
446 | .put_super = ovl_put_super, | ||
447 | .statfs = ovl_statfs, | ||
448 | .show_options = ovl_show_options, | ||
449 | }; | ||
450 | |||
451 | enum { | ||
452 | OPT_LOWERDIR, | ||
453 | OPT_UPPERDIR, | ||
454 | OPT_WORKDIR, | ||
455 | OPT_ERR, | ||
456 | }; | ||
457 | |||
458 | static const match_table_t ovl_tokens = { | ||
459 | {OPT_LOWERDIR, "lowerdir=%s"}, | ||
460 | {OPT_UPPERDIR, "upperdir=%s"}, | ||
461 | {OPT_WORKDIR, "workdir=%s"}, | ||
462 | {OPT_ERR, NULL} | ||
463 | }; | ||
464 | |||
465 | static int ovl_parse_opt(char *opt, struct ovl_config *config) | ||
466 | { | ||
467 | char *p; | ||
468 | |||
469 | while ((p = strsep(&opt, ",")) != NULL) { | ||
470 | int token; | ||
471 | substring_t args[MAX_OPT_ARGS]; | ||
472 | |||
473 | if (!*p) | ||
474 | continue; | ||
475 | |||
476 | token = match_token(p, ovl_tokens, args); | ||
477 | switch (token) { | ||
478 | case OPT_UPPERDIR: | ||
479 | kfree(config->upperdir); | ||
480 | config->upperdir = match_strdup(&args[0]); | ||
481 | if (!config->upperdir) | ||
482 | return -ENOMEM; | ||
483 | break; | ||
484 | |||
485 | case OPT_LOWERDIR: | ||
486 | kfree(config->lowerdir); | ||
487 | config->lowerdir = match_strdup(&args[0]); | ||
488 | if (!config->lowerdir) | ||
489 | return -ENOMEM; | ||
490 | break; | ||
491 | |||
492 | case OPT_WORKDIR: | ||
493 | kfree(config->workdir); | ||
494 | config->workdir = match_strdup(&args[0]); | ||
495 | if (!config->workdir) | ||
496 | return -ENOMEM; | ||
497 | break; | ||
498 | |||
499 | default: | ||
500 | return -EINVAL; | ||
501 | } | ||
502 | } | ||
503 | return 0; | ||
504 | } | ||
505 | |||
506 | #define OVL_WORKDIR_NAME "work" | ||
507 | |||
508 | static struct dentry *ovl_workdir_create(struct vfsmount *mnt, | ||
509 | struct dentry *dentry) | ||
510 | { | ||
511 | struct inode *dir = dentry->d_inode; | ||
512 | struct dentry *work; | ||
513 | int err; | ||
514 | bool retried = false; | ||
515 | |||
516 | err = mnt_want_write(mnt); | ||
517 | if (err) | ||
518 | return ERR_PTR(err); | ||
519 | |||
520 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
521 | retry: | ||
522 | work = lookup_one_len(OVL_WORKDIR_NAME, dentry, | ||
523 | strlen(OVL_WORKDIR_NAME)); | ||
524 | |||
525 | if (!IS_ERR(work)) { | ||
526 | struct kstat stat = { | ||
527 | .mode = S_IFDIR | 0, | ||
528 | }; | ||
529 | |||
530 | if (work->d_inode) { | ||
531 | err = -EEXIST; | ||
532 | if (retried) | ||
533 | goto out_dput; | ||
534 | |||
535 | retried = true; | ||
536 | ovl_cleanup(dir, work); | ||
537 | dput(work); | ||
538 | goto retry; | ||
539 | } | ||
540 | |||
541 | err = ovl_create_real(dir, work, &stat, NULL, NULL, true); | ||
542 | if (err) | ||
543 | goto out_dput; | ||
544 | } | ||
545 | out_unlock: | ||
546 | mutex_unlock(&dir->i_mutex); | ||
547 | mnt_drop_write(mnt); | ||
548 | |||
549 | return work; | ||
550 | |||
551 | out_dput: | ||
552 | dput(work); | ||
553 | work = ERR_PTR(err); | ||
554 | goto out_unlock; | ||
555 | } | ||
556 | |||
557 | static int ovl_mount_dir(const char *name, struct path *path) | ||
558 | { | ||
559 | int err; | ||
560 | |||
561 | err = kern_path(name, LOOKUP_FOLLOW, path); | ||
562 | if (err) { | ||
563 | pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); | ||
564 | err = -EINVAL; | ||
565 | } | ||
566 | return err; | ||
567 | } | ||
568 | |||
569 | static bool ovl_is_allowed_fs_type(struct dentry *root) | ||
570 | { | ||
571 | const struct dentry_operations *dop = root->d_op; | ||
572 | |||
573 | /* | ||
574 | * We don't support: | ||
575 | * - automount filesystems | ||
576 | * - filesystems with revalidate (FIXME for lower layer) | ||
577 | * - filesystems with case insensitive names | ||
578 | */ | ||
579 | if (dop && | ||
580 | (dop->d_manage || dop->d_automount || | ||
581 | dop->d_revalidate || dop->d_weak_revalidate || | ||
582 | dop->d_compare || dop->d_hash)) { | ||
583 | return false; | ||
584 | } | ||
585 | return true; | ||
586 | } | ||
587 | |||
588 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
589 | static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) | ||
590 | { | ||
591 | bool ok = false; | ||
592 | |||
593 | if (workdir != upperdir) { | ||
594 | ok = (lock_rename(workdir, upperdir) == NULL); | ||
595 | unlock_rename(workdir, upperdir); | ||
596 | } | ||
597 | return ok; | ||
598 | } | ||
599 | |||
600 | static int ovl_fill_super(struct super_block *sb, void *data, int silent) | ||
601 | { | ||
602 | struct path lowerpath; | ||
603 | struct path upperpath; | ||
604 | struct path workpath; | ||
605 | struct inode *root_inode; | ||
606 | struct dentry *root_dentry; | ||
607 | struct ovl_entry *oe; | ||
608 | struct ovl_fs *ufs; | ||
609 | struct kstatfs statfs; | ||
610 | int err; | ||
611 | |||
612 | err = -ENOMEM; | ||
613 | ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); | ||
614 | if (!ufs) | ||
615 | goto out; | ||
616 | |||
617 | err = ovl_parse_opt((char *) data, &ufs->config); | ||
618 | if (err) | ||
619 | goto out_free_config; | ||
620 | |||
621 | /* FIXME: workdir is not needed for a R/O mount */ | ||
622 | err = -EINVAL; | ||
623 | if (!ufs->config.upperdir || !ufs->config.lowerdir || | ||
624 | !ufs->config.workdir) { | ||
625 | pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); | ||
626 | goto out_free_config; | ||
627 | } | ||
628 | |||
629 | err = -ENOMEM; | ||
630 | oe = ovl_alloc_entry(); | ||
631 | if (oe == NULL) | ||
632 | goto out_free_config; | ||
633 | |||
634 | err = ovl_mount_dir(ufs->config.upperdir, &upperpath); | ||
635 | if (err) | ||
636 | goto out_free_oe; | ||
637 | |||
638 | err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); | ||
639 | if (err) | ||
640 | goto out_put_upperpath; | ||
641 | |||
642 | err = ovl_mount_dir(ufs->config.workdir, &workpath); | ||
643 | if (err) | ||
644 | goto out_put_lowerpath; | ||
645 | |||
646 | err = -EINVAL; | ||
647 | if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || | ||
648 | !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || | ||
649 | !S_ISDIR(workpath.dentry->d_inode->i_mode)) { | ||
650 | pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); | ||
651 | goto out_put_workpath; | ||
652 | } | ||
653 | |||
654 | if (upperpath.mnt != workpath.mnt) { | ||
655 | pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); | ||
656 | goto out_put_workpath; | ||
657 | } | ||
658 | if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { | ||
659 | pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); | ||
660 | goto out_put_workpath; | ||
661 | } | ||
662 | |||
663 | if (!ovl_is_allowed_fs_type(upperpath.dentry)) { | ||
664 | pr_err("overlayfs: filesystem of upperdir is not supported\n"); | ||
665 | goto out_put_workpath; | ||
666 | } | ||
667 | |||
668 | if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { | ||
669 | pr_err("overlayfs: filesystem of lowerdir is not supported\n"); | ||
670 | goto out_put_workpath; | ||
671 | } | ||
672 | |||
673 | err = vfs_statfs(&lowerpath, &statfs); | ||
674 | if (err) { | ||
675 | pr_err("overlayfs: statfs failed on lowerpath\n"); | ||
676 | goto out_put_workpath; | ||
677 | } | ||
678 | ufs->lower_namelen = statfs.f_namelen; | ||
679 | |||
680 | sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, | ||
681 | lowerpath.mnt->mnt_sb->s_stack_depth) + 1; | ||
682 | |||
683 | err = -EINVAL; | ||
684 | if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
685 | pr_err("overlayfs: maximum fs stacking depth exceeded\n"); | ||
686 | goto out_put_workpath; | ||
687 | } | ||
688 | |||
689 | ufs->upper_mnt = clone_private_mount(&upperpath); | ||
690 | err = PTR_ERR(ufs->upper_mnt); | ||
691 | if (IS_ERR(ufs->upper_mnt)) { | ||
692 | pr_err("overlayfs: failed to clone upperpath\n"); | ||
693 | goto out_put_workpath; | ||
694 | } | ||
695 | |||
696 | ufs->lower_mnt = clone_private_mount(&lowerpath); | ||
697 | err = PTR_ERR(ufs->lower_mnt); | ||
698 | if (IS_ERR(ufs->lower_mnt)) { | ||
699 | pr_err("overlayfs: failed to clone lowerpath\n"); | ||
700 | goto out_put_upper_mnt; | ||
701 | } | ||
702 | |||
703 | ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); | ||
704 | err = PTR_ERR(ufs->workdir); | ||
705 | if (IS_ERR(ufs->workdir)) { | ||
706 | pr_err("overlayfs: failed to create directory %s/%s\n", | ||
707 | ufs->config.workdir, OVL_WORKDIR_NAME); | ||
708 | goto out_put_lower_mnt; | ||
709 | } | ||
710 | |||
711 | /* | ||
712 | * Make lower_mnt R/O. That way fchmod/fchown on lower file | ||
713 | * will fail instead of modifying lower fs. | ||
714 | */ | ||
715 | ufs->lower_mnt->mnt_flags |= MNT_READONLY; | ||
716 | |||
717 | /* If the upper fs is r/o, we mark overlayfs r/o too */ | ||
718 | if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) | ||
719 | sb->s_flags |= MS_RDONLY; | ||
720 | |||
721 | sb->s_d_op = &ovl_dentry_operations; | ||
722 | |||
723 | err = -ENOMEM; | ||
724 | root_inode = ovl_new_inode(sb, S_IFDIR, oe); | ||
725 | if (!root_inode) | ||
726 | goto out_put_workdir; | ||
727 | |||
728 | root_dentry = d_make_root(root_inode); | ||
729 | if (!root_dentry) | ||
730 | goto out_put_workdir; | ||
731 | |||
732 | mntput(upperpath.mnt); | ||
733 | mntput(lowerpath.mnt); | ||
734 | path_put(&workpath); | ||
735 | |||
736 | oe->__upperdentry = upperpath.dentry; | ||
737 | oe->lowerdentry = lowerpath.dentry; | ||
738 | |||
739 | root_dentry->d_fsdata = oe; | ||
740 | |||
741 | sb->s_magic = OVERLAYFS_SUPER_MAGIC; | ||
742 | sb->s_op = &ovl_super_operations; | ||
743 | sb->s_root = root_dentry; | ||
744 | sb->s_fs_info = ufs; | ||
745 | |||
746 | return 0; | ||
747 | |||
748 | out_put_workdir: | ||
749 | dput(ufs->workdir); | ||
750 | out_put_lower_mnt: | ||
751 | mntput(ufs->lower_mnt); | ||
752 | out_put_upper_mnt: | ||
753 | mntput(ufs->upper_mnt); | ||
754 | out_put_workpath: | ||
755 | path_put(&workpath); | ||
756 | out_put_lowerpath: | ||
757 | path_put(&lowerpath); | ||
758 | out_put_upperpath: | ||
759 | path_put(&upperpath); | ||
760 | out_free_oe: | ||
761 | kfree(oe); | ||
762 | out_free_config: | ||
763 | kfree(ufs->config.lowerdir); | ||
764 | kfree(ufs->config.upperdir); | ||
765 | kfree(ufs->config.workdir); | ||
766 | kfree(ufs); | ||
767 | out: | ||
768 | return err; | ||
769 | } | ||
770 | |||
771 | static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, | ||
772 | const char *dev_name, void *raw_data) | ||
773 | { | ||
774 | return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); | ||
775 | } | ||
776 | |||
777 | static struct file_system_type ovl_fs_type = { | ||
778 | .owner = THIS_MODULE, | ||
779 | .name = "overlayfs", | ||
780 | .mount = ovl_mount, | ||
781 | .kill_sb = kill_anon_super, | ||
782 | }; | ||
783 | MODULE_ALIAS_FS("overlayfs"); | ||
784 | |||
785 | static int __init ovl_init(void) | ||
786 | { | ||
787 | return register_filesystem(&ovl_fs_type); | ||
788 | } | ||
789 | |||
790 | static void __exit ovl_exit(void) | ||
791 | { | ||
792 | unregister_filesystem(&ovl_fs_type); | ||
793 | } | ||
794 | |||
795 | module_init(ovl_init); | ||
796 | module_exit(ovl_exit); | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8b663b2d9562..6b4527216a7f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -634,7 +634,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type) | |||
634 | dqstats_inc(DQST_LOOKUPS); | 634 | dqstats_inc(DQST_LOOKUPS); |
635 | err = sb->dq_op->write_dquot(dquot); | 635 | err = sb->dq_op->write_dquot(dquot); |
636 | if (!ret && err) | 636 | if (!ret && err) |
637 | err = ret; | 637 | ret = err; |
638 | dqput(dquot); | 638 | dqput(dquot); |
639 | spin_lock(&dq_list_lock); | 639 | spin_lock(&dq_list_lock); |
640 | } | 640 | } |
diff --git a/fs/splice.c b/fs/splice.c index f5cb9ba84510..75c6058eabf2 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1330 | 1330 | ||
1331 | return ret; | 1331 | return ret; |
1332 | } | 1332 | } |
1333 | EXPORT_SYMBOL(do_splice_direct); | ||
1333 | 1334 | ||
1334 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | 1335 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, |
1335 | struct pipe_inode_info *opipe, | 1336 | struct pipe_inode_info *opipe, |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 92e8f99a5857..281002689d64 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1338,7 +1338,10 @@ xfs_free_file_space( | |||
1338 | goto out; | 1338 | goto out; |
1339 | } | 1339 | } |
1340 | 1340 | ||
1341 | 1341 | /* | |
1342 | * Preallocate and zero a range of a file. This mechanism has the allocation | ||
1343 | * semantics of fallocate and in addition converts data in the range to zeroes. | ||
1344 | */ | ||
1342 | int | 1345 | int |
1343 | xfs_zero_file_space( | 1346 | xfs_zero_file_space( |
1344 | struct xfs_inode *ip, | 1347 | struct xfs_inode *ip, |
@@ -1346,65 +1349,30 @@ xfs_zero_file_space( | |||
1346 | xfs_off_t len) | 1349 | xfs_off_t len) |
1347 | { | 1350 | { |
1348 | struct xfs_mount *mp = ip->i_mount; | 1351 | struct xfs_mount *mp = ip->i_mount; |
1349 | uint granularity; | 1352 | uint blksize; |
1350 | xfs_off_t start_boundary; | ||
1351 | xfs_off_t end_boundary; | ||
1352 | int error; | 1353 | int error; |
1353 | 1354 | ||
1354 | trace_xfs_zero_file_space(ip); | 1355 | trace_xfs_zero_file_space(ip); |
1355 | 1356 | ||
1356 | granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | 1357 | blksize = 1 << mp->m_sb.sb_blocklog; |
1357 | 1358 | ||
1358 | /* | 1359 | /* |
1359 | * Round the range of extents we are going to convert inwards. If the | 1360 | * Punch a hole and prealloc the range. We use hole punch rather than |
1360 | * offset is aligned, then it doesn't get changed so we zero from the | 1361 | * unwritten extent conversion for two reasons: |
1361 | * start of the block offset points to. | 1362 | * |
1363 | * 1.) Hole punch handles partial block zeroing for us. | ||
1364 | * | ||
1365 | * 2.) If prealloc returns ENOSPC, the file range is still zero-valued | ||
1366 | * by virtue of the hole punch. | ||
1362 | */ | 1367 | */ |
1363 | start_boundary = round_up(offset, granularity); | 1368 | error = xfs_free_file_space(ip, offset, len); |
1364 | end_boundary = round_down(offset + len, granularity); | 1369 | if (error) |
1365 | 1370 | goto out; | |
1366 | ASSERT(start_boundary >= offset); | ||
1367 | ASSERT(end_boundary <= offset + len); | ||
1368 | |||
1369 | if (start_boundary < end_boundary - 1) { | ||
1370 | /* | ||
1371 | * Writeback the range to ensure any inode size updates due to | ||
1372 | * appending writes make it to disk (otherwise we could just | ||
1373 | * punch out the delalloc blocks). | ||
1374 | */ | ||
1375 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | ||
1376 | start_boundary, end_boundary - 1); | ||
1377 | if (error) | ||
1378 | goto out; | ||
1379 | truncate_pagecache_range(VFS_I(ip), start_boundary, | ||
1380 | end_boundary - 1); | ||
1381 | |||
1382 | /* convert the blocks */ | ||
1383 | error = xfs_alloc_file_space(ip, start_boundary, | ||
1384 | end_boundary - start_boundary - 1, | ||
1385 | XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT); | ||
1386 | if (error) | ||
1387 | goto out; | ||
1388 | |||
1389 | /* We've handled the interior of the range, now for the edges */ | ||
1390 | if (start_boundary != offset) { | ||
1391 | error = xfs_iozero(ip, offset, start_boundary - offset); | ||
1392 | if (error) | ||
1393 | goto out; | ||
1394 | } | ||
1395 | |||
1396 | if (end_boundary != offset + len) | ||
1397 | error = xfs_iozero(ip, end_boundary, | ||
1398 | offset + len - end_boundary); | ||
1399 | |||
1400 | } else { | ||
1401 | /* | ||
1402 | * It's either a sub-granularity range or the range spanned lies | ||
1403 | * partially across two adjacent blocks. | ||
1404 | */ | ||
1405 | error = xfs_iozero(ip, offset, len); | ||
1406 | } | ||
1407 | 1371 | ||
1372 | error = xfs_alloc_file_space(ip, round_down(offset, blksize), | ||
1373 | round_up(offset + len, blksize) - | ||
1374 | round_down(offset, blksize), | ||
1375 | XFS_BMAPI_PREALLOC); | ||
1408 | out: | 1376 | out: |
1409 | return error; | 1377 | return error; |
1410 | 1378 | ||
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f1deb961a296..894924a5129b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk( | |||
236 | XFS_WANT_CORRUPTED_RETURN(stat == 1); | 236 | XFS_WANT_CORRUPTED_RETURN(stat == 1); |
237 | 237 | ||
238 | /* Check if the record contains the inode in request */ | 238 | /* Check if the record contains the inode in request */ |
239 | if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) | 239 | if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { |
240 | return -EINVAL; | 240 | *icount = 0; |
241 | return 0; | ||
242 | } | ||
241 | 243 | ||
242 | idx = agino - irec->ir_startino + 1; | 244 | idx = agino - irec->ir_startino + 1; |
243 | if (idx < XFS_INODES_PER_CHUNK && | 245 | if (idx < XFS_INODES_PER_CHUNK && |
@@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk( | |||
262 | 264 | ||
263 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) | 265 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) |
264 | 266 | ||
267 | struct xfs_bulkstat_agichunk { | ||
268 | char __user **ac_ubuffer;/* pointer into user's buffer */ | ||
269 | int ac_ubleft; /* bytes left in user's buffer */ | ||
270 | int ac_ubelem; /* spaces used in user's buffer */ | ||
271 | }; | ||
272 | |||
265 | /* | 273 | /* |
266 | * Process inodes in chunk with a pointer to a formatter function | 274 | * Process inodes in chunk with a pointer to a formatter function |
267 | * that will iget the inode and fill in the appropriate structure. | 275 | * that will iget the inode and fill in the appropriate structure. |
268 | */ | 276 | */ |
269 | int | 277 | static int |
270 | xfs_bulkstat_ag_ichunk( | 278 | xfs_bulkstat_ag_ichunk( |
271 | struct xfs_mount *mp, | 279 | struct xfs_mount *mp, |
272 | xfs_agnumber_t agno, | 280 | xfs_agnumber_t agno, |
273 | struct xfs_inobt_rec_incore *irbp, | 281 | struct xfs_inobt_rec_incore *irbp, |
274 | bulkstat_one_pf formatter, | 282 | bulkstat_one_pf formatter, |
275 | size_t statstruct_size, | 283 | size_t statstruct_size, |
276 | struct xfs_bulkstat_agichunk *acp) | 284 | struct xfs_bulkstat_agichunk *acp, |
285 | xfs_agino_t *last_agino) | ||
277 | { | 286 | { |
278 | xfs_ino_t lastino = acp->ac_lastino; | ||
279 | char __user **ubufp = acp->ac_ubuffer; | 287 | char __user **ubufp = acp->ac_ubuffer; |
280 | int ubleft = acp->ac_ubleft; | 288 | int chunkidx; |
281 | int ubelem = acp->ac_ubelem; | ||
282 | int chunkidx, clustidx; | ||
283 | int error = 0; | 289 | int error = 0; |
284 | xfs_agino_t agino; | 290 | xfs_agino_t agino = irbp->ir_startino; |
285 | 291 | ||
286 | for (agino = irbp->ir_startino, chunkidx = clustidx = 0; | 292 | for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; |
287 | XFS_BULKSTAT_UBLEFT(ubleft) && | 293 | chunkidx++, agino++) { |
288 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; | 294 | int fmterror; |
289 | chunkidx++, clustidx++, agino++) { | ||
290 | int fmterror; /* bulkstat formatter result */ | ||
291 | int ubused; | 295 | int ubused; |
292 | xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); | ||
293 | 296 | ||
294 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); | 297 | /* inode won't fit in buffer, we are done */ |
298 | if (acp->ac_ubleft < statstruct_size) | ||
299 | break; | ||
295 | 300 | ||
296 | /* Skip if this inode is free */ | 301 | /* Skip if this inode is free */ |
297 | if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { | 302 | if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) |
298 | lastino = ino; | ||
299 | continue; | 303 | continue; |
300 | } | ||
301 | |||
302 | /* | ||
303 | * Count used inodes as free so we can tell when the | ||
304 | * chunk is used up. | ||
305 | */ | ||
306 | irbp->ir_freecount++; | ||
307 | 304 | ||
308 | /* Get the inode and fill in a single buffer */ | 305 | /* Get the inode and fill in a single buffer */ |
309 | ubused = statstruct_size; | 306 | ubused = statstruct_size; |
310 | error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); | 307 | error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), |
311 | if (fmterror == BULKSTAT_RV_NOTHING) { | 308 | *ubufp, acp->ac_ubleft, &ubused, &fmterror); |
312 | if (error && error != -ENOENT && error != -EINVAL) { | 309 | |
313 | ubleft = 0; | 310 | if (fmterror == BULKSTAT_RV_GIVEUP || |
314 | break; | 311 | (error && error != -ENOENT && error != -EINVAL)) { |
315 | } | 312 | acp->ac_ubleft = 0; |
316 | lastino = ino; | ||
317 | continue; | ||
318 | } | ||
319 | if (fmterror == BULKSTAT_RV_GIVEUP) { | ||
320 | ubleft = 0; | ||
321 | ASSERT(error); | 313 | ASSERT(error); |
322 | break; | 314 | break; |
323 | } | 315 | } |
324 | if (*ubufp) | 316 | |
325 | *ubufp += ubused; | 317 | /* be careful not to leak error if at end of chunk */ |
326 | ubleft -= ubused; | 318 | if (fmterror == BULKSTAT_RV_NOTHING || error) { |
327 | ubelem++; | 319 | error = 0; |
328 | lastino = ino; | 320 | continue; |
321 | } | ||
322 | |||
323 | *ubufp += ubused; | ||
324 | acp->ac_ubleft -= ubused; | ||
325 | acp->ac_ubelem++; | ||
329 | } | 326 | } |
330 | 327 | ||
331 | acp->ac_lastino = lastino; | 328 | /* |
332 | acp->ac_ubleft = ubleft; | 329 | * Post-update *last_agino. At this point, agino will always point one |
333 | acp->ac_ubelem = ubelem; | 330 | * inode past the last inode we processed successfully. Hence we |
331 | * substract that inode when setting the *last_agino cursor so that we | ||
332 | * return the correct cookie to userspace. On the next bulkstat call, | ||
333 | * the inode under the lastino cookie will be skipped as we have already | ||
334 | * processed it here. | ||
335 | */ | ||
336 | *last_agino = agino - 1; | ||
334 | 337 | ||
335 | return error; | 338 | return error; |
336 | } | 339 | } |
@@ -353,45 +356,33 @@ xfs_bulkstat( | |||
353 | xfs_agino_t agino; /* inode # in allocation group */ | 356 | xfs_agino_t agino; /* inode # in allocation group */ |
354 | xfs_agnumber_t agno; /* allocation group number */ | 357 | xfs_agnumber_t agno; /* allocation group number */ |
355 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 358 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
356 | int end_of_ag; /* set if we've seen the ag end */ | ||
357 | int error; /* error code */ | ||
358 | int fmterror;/* bulkstat formatter result */ | ||
359 | int i; /* loop index */ | ||
360 | int icount; /* count of inodes good in irbuf */ | ||
361 | size_t irbsize; /* size of irec buffer in bytes */ | 359 | size_t irbsize; /* size of irec buffer in bytes */ |
362 | xfs_ino_t ino; /* inode number (filesystem) */ | ||
363 | xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ | ||
364 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ | 360 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ |
365 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ | ||
366 | xfs_ino_t lastino; /* last inode number returned */ | ||
367 | int nirbuf; /* size of irbuf */ | 361 | int nirbuf; /* size of irbuf */ |
368 | int rval; /* return value error code */ | ||
369 | int tmp; /* result value from btree calls */ | ||
370 | int ubcount; /* size of user's buffer */ | 362 | int ubcount; /* size of user's buffer */ |
371 | int ubleft; /* bytes left in user's buffer */ | 363 | struct xfs_bulkstat_agichunk ac; |
372 | char __user *ubufp; /* pointer into user's buffer */ | 364 | int error = 0; |
373 | int ubelem; /* spaces used in user's buffer */ | ||
374 | 365 | ||
375 | /* | 366 | /* |
376 | * Get the last inode value, see if there's nothing to do. | 367 | * Get the last inode value, see if there's nothing to do. |
377 | */ | 368 | */ |
378 | ino = (xfs_ino_t)*lastinop; | 369 | agno = XFS_INO_TO_AGNO(mp, *lastinop); |
379 | lastino = ino; | 370 | agino = XFS_INO_TO_AGINO(mp, *lastinop); |
380 | agno = XFS_INO_TO_AGNO(mp, ino); | ||
381 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
382 | if (agno >= mp->m_sb.sb_agcount || | 371 | if (agno >= mp->m_sb.sb_agcount || |
383 | ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | 372 | *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { |
384 | *done = 1; | 373 | *done = 1; |
385 | *ubcountp = 0; | 374 | *ubcountp = 0; |
386 | return 0; | 375 | return 0; |
387 | } | 376 | } |
388 | 377 | ||
389 | ubcount = *ubcountp; /* statstruct's */ | 378 | ubcount = *ubcountp; /* statstruct's */ |
390 | ubleft = ubcount * statstruct_size; /* bytes */ | 379 | ac.ac_ubuffer = &ubuffer; |
391 | *ubcountp = ubelem = 0; | 380 | ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; |
381 | ac.ac_ubelem = 0; | ||
382 | |||
383 | *ubcountp = 0; | ||
392 | *done = 0; | 384 | *done = 0; |
393 | fmterror = 0; | 385 | |
394 | ubufp = ubuffer; | ||
395 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); | 386 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); |
396 | if (!irbuf) | 387 | if (!irbuf) |
397 | return -ENOMEM; | 388 | return -ENOMEM; |
@@ -402,9 +393,13 @@ xfs_bulkstat( | |||
402 | * Loop over the allocation groups, starting from the last | 393 | * Loop over the allocation groups, starting from the last |
403 | * inode returned; 0 means start of the allocation group. | 394 | * inode returned; 0 means start of the allocation group. |
404 | */ | 395 | */ |
405 | rval = 0; | 396 | while (agno < mp->m_sb.sb_agcount) { |
406 | while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { | 397 | struct xfs_inobt_rec_incore *irbp = irbuf; |
407 | cond_resched(); | 398 | struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; |
399 | bool end_of_ag = false; | ||
400 | int icount = 0; | ||
401 | int stat; | ||
402 | |||
408 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | 403 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
409 | if (error) | 404 | if (error) |
410 | break; | 405 | break; |
@@ -414,10 +409,6 @@ xfs_bulkstat( | |||
414 | */ | 409 | */ |
415 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, | 410 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, |
416 | XFS_BTNUM_INO); | 411 | XFS_BTNUM_INO); |
417 | irbp = irbuf; | ||
418 | irbufend = irbuf + nirbuf; | ||
419 | end_of_ag = 0; | ||
420 | icount = 0; | ||
421 | if (agino > 0) { | 412 | if (agino > 0) { |
422 | /* | 413 | /* |
423 | * In the middle of an allocation group, we need to get | 414 | * In the middle of an allocation group, we need to get |
@@ -427,22 +418,23 @@ xfs_bulkstat( | |||
427 | 418 | ||
428 | error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); | 419 | error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); |
429 | if (error) | 420 | if (error) |
430 | break; | 421 | goto del_cursor; |
431 | if (icount) { | 422 | if (icount) { |
432 | irbp->ir_startino = r.ir_startino; | 423 | irbp->ir_startino = r.ir_startino; |
433 | irbp->ir_freecount = r.ir_freecount; | 424 | irbp->ir_freecount = r.ir_freecount; |
434 | irbp->ir_free = r.ir_free; | 425 | irbp->ir_free = r.ir_free; |
435 | irbp++; | 426 | irbp++; |
436 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; | ||
437 | } | 427 | } |
438 | /* Increment to the next record */ | 428 | /* Increment to the next record */ |
439 | error = xfs_btree_increment(cur, 0, &tmp); | 429 | error = xfs_btree_increment(cur, 0, &stat); |
440 | } else { | 430 | } else { |
441 | /* Start of ag. Lookup the first inode chunk */ | 431 | /* Start of ag. Lookup the first inode chunk */ |
442 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); | 432 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); |
433 | } | ||
434 | if (error || stat == 0) { | ||
435 | end_of_ag = true; | ||
436 | goto del_cursor; | ||
443 | } | 437 | } |
444 | if (error) | ||
445 | break; | ||
446 | 438 | ||
447 | /* | 439 | /* |
448 | * Loop through inode btree records in this ag, | 440 | * Loop through inode btree records in this ag, |
@@ -451,10 +443,10 @@ xfs_bulkstat( | |||
451 | while (irbp < irbufend && icount < ubcount) { | 443 | while (irbp < irbufend && icount < ubcount) { |
452 | struct xfs_inobt_rec_incore r; | 444 | struct xfs_inobt_rec_incore r; |
453 | 445 | ||
454 | error = xfs_inobt_get_rec(cur, &r, &i); | 446 | error = xfs_inobt_get_rec(cur, &r, &stat); |
455 | if (error || i == 0) { | 447 | if (error || stat == 0) { |
456 | end_of_ag = 1; | 448 | end_of_ag = true; |
457 | break; | 449 | goto del_cursor; |
458 | } | 450 | } |
459 | 451 | ||
460 | /* | 452 | /* |
@@ -469,77 +461,79 @@ xfs_bulkstat( | |||
469 | irbp++; | 461 | irbp++; |
470 | icount += XFS_INODES_PER_CHUNK - r.ir_freecount; | 462 | icount += XFS_INODES_PER_CHUNK - r.ir_freecount; |
471 | } | 463 | } |
472 | /* | 464 | error = xfs_btree_increment(cur, 0, &stat); |
473 | * Set agino to after this chunk and bump the cursor. | 465 | if (error || stat == 0) { |
474 | */ | 466 | end_of_ag = true; |
475 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; | 467 | goto del_cursor; |
476 | error = xfs_btree_increment(cur, 0, &tmp); | 468 | } |
477 | cond_resched(); | 469 | cond_resched(); |
478 | } | 470 | } |
471 | |||
479 | /* | 472 | /* |
480 | * Drop the btree buffers and the agi buffer. | 473 | * Drop the btree buffers and the agi buffer as we can't hold any |
481 | * We can't hold any of the locks these represent | 474 | * of the locks these represent when calling iget. If there is a |
482 | * when calling iget. | 475 | * pending error, then we are done. |
483 | */ | 476 | */ |
477 | del_cursor: | ||
484 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 478 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
485 | xfs_buf_relse(agbp); | 479 | xfs_buf_relse(agbp); |
480 | if (error) | ||
481 | break; | ||
486 | /* | 482 | /* |
487 | * Now format all the good inodes into the user's buffer. | 483 | * Now format all the good inodes into the user's buffer. The |
484 | * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer | ||
485 | * for the next loop iteration. | ||
488 | */ | 486 | */ |
489 | irbufend = irbp; | 487 | irbufend = irbp; |
490 | for (irbp = irbuf; | 488 | for (irbp = irbuf; |
491 | irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { | 489 | irbp < irbufend && ac.ac_ubleft >= statstruct_size; |
492 | struct xfs_bulkstat_agichunk ac; | 490 | irbp++) { |
493 | |||
494 | ac.ac_lastino = lastino; | ||
495 | ac.ac_ubuffer = &ubuffer; | ||
496 | ac.ac_ubleft = ubleft; | ||
497 | ac.ac_ubelem = ubelem; | ||
498 | error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, | 491 | error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, |
499 | formatter, statstruct_size, &ac); | 492 | formatter, statstruct_size, &ac, |
493 | &agino); | ||
500 | if (error) | 494 | if (error) |
501 | rval = error; | 495 | break; |
502 | |||
503 | lastino = ac.ac_lastino; | ||
504 | ubleft = ac.ac_ubleft; | ||
505 | ubelem = ac.ac_ubelem; | ||
506 | 496 | ||
507 | cond_resched(); | 497 | cond_resched(); |
508 | } | 498 | } |
499 | |||
509 | /* | 500 | /* |
510 | * Set up for the next loop iteration. | 501 | * If we've run out of space or had a formatting error, we |
502 | * are now done | ||
511 | */ | 503 | */ |
512 | if (XFS_BULKSTAT_UBLEFT(ubleft)) { | 504 | if (ac.ac_ubleft < statstruct_size || error) |
513 | if (end_of_ag) { | ||
514 | agno++; | ||
515 | agino = 0; | ||
516 | } else | ||
517 | agino = XFS_INO_TO_AGINO(mp, lastino); | ||
518 | } else | ||
519 | break; | 505 | break; |
506 | |||
507 | if (end_of_ag) { | ||
508 | agno++; | ||
509 | agino = 0; | ||
510 | } | ||
520 | } | 511 | } |
521 | /* | 512 | /* |
522 | * Done, we're either out of filesystem or space to put the data. | 513 | * Done, we're either out of filesystem or space to put the data. |
523 | */ | 514 | */ |
524 | kmem_free(irbuf); | 515 | kmem_free(irbuf); |
525 | *ubcountp = ubelem; | 516 | *ubcountp = ac.ac_ubelem; |
517 | |||
526 | /* | 518 | /* |
527 | * Found some inodes, return them now and return the error next time. | 519 | * We found some inodes, so clear the error status and return them. |
520 | * The lastino pointer will point directly at the inode that triggered | ||
521 | * any error that occurred, so on the next call the error will be | ||
522 | * triggered again and propagated to userspace as there will be no | ||
523 | * formatted inodes in the buffer. | ||
528 | */ | 524 | */ |
529 | if (ubelem) | 525 | if (ac.ac_ubelem) |
530 | rval = 0; | 526 | error = 0; |
531 | if (agno >= mp->m_sb.sb_agcount) { | 527 | |
532 | /* | 528 | /* |
533 | * If we ran out of filesystem, mark lastino as off | 529 | * If we ran out of filesystem, lastino will point off the end of |
534 | * the end of the filesystem, so the next call | 530 | * the filesystem so the next call will return immediately. |
535 | * will return immediately. | 531 | */ |
536 | */ | 532 | *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); |
537 | *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); | 533 | if (agno >= mp->m_sb.sb_agcount) |
538 | *done = 1; | 534 | *done = 1; |
539 | } else | ||
540 | *lastinop = (xfs_ino_t)lastino; | ||
541 | 535 | ||
542 | return rval; | 536 | return error; |
543 | } | 537 | } |
544 | 538 | ||
545 | int | 539 | int |
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index aaed08022eb9..6ea8b3912fa4 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h | |||
@@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, | |||
30 | int *ubused, | 30 | int *ubused, |
31 | int *stat); | 31 | int *stat); |
32 | 32 | ||
33 | struct xfs_bulkstat_agichunk { | ||
34 | xfs_ino_t ac_lastino; /* last inode returned */ | ||
35 | char __user **ac_ubuffer;/* pointer into user's buffer */ | ||
36 | int ac_ubleft; /* bytes left in user's buffer */ | ||
37 | int ac_ubelem; /* spaces used in user's buffer */ | ||
38 | }; | ||
39 | |||
40 | int | ||
41 | xfs_bulkstat_ag_ichunk( | ||
42 | struct xfs_mount *mp, | ||
43 | xfs_agnumber_t agno, | ||
44 | struct xfs_inobt_rec_incore *irbp, | ||
45 | bulkstat_one_pf formatter, | ||
46 | size_t statstruct_size, | ||
47 | struct xfs_bulkstat_agichunk *acp); | ||
48 | |||
49 | /* | 33 | /* |
50 | * Values for stat return value. | 34 | * Values for stat return value. |
51 | */ | 35 | */ |