diff options
Diffstat (limited to 'fs')
111 files changed, 5488 insertions, 2455 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index db5dc1598716..664991afe0c0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -67,6 +67,7 @@ source "fs/quota/Kconfig" | |||
| 67 | 67 | ||
| 68 | source "fs/autofs4/Kconfig" | 68 | source "fs/autofs4/Kconfig" |
| 69 | source "fs/fuse/Kconfig" | 69 | source "fs/fuse/Kconfig" |
| 70 | source "fs/overlayfs/Kconfig" | ||
| 70 | 71 | ||
| 71 | menu "Caches" | 72 | menu "Caches" |
| 72 | 73 | ||
diff --git a/fs/Makefile b/fs/Makefile index 90c88529892b..da0bbb456d3f 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ | |||
| 104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ | 104 | obj-$(CONFIG_AUTOFS4_FS) += autofs4/ |
| 105 | obj-$(CONFIG_ADFS_FS) += adfs/ | 105 | obj-$(CONFIG_ADFS_FS) += adfs/ |
| 106 | obj-$(CONFIG_FUSE_FS) += fuse/ | 106 | obj-$(CONFIG_FUSE_FS) += fuse/ |
| 107 | obj-$(CONFIG_OVERLAY_FS) += overlayfs/ | ||
| 107 | obj-$(CONFIG_UDF_FS) += udf/ | 108 | obj-$(CONFIG_UDF_FS) += udf/ |
| 108 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ | 109 | obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ |
| 109 | obj-$(CONFIG_OMFS_FS) += omfs/ | 110 | obj-$(CONFIG_OMFS_FS) += omfs/ |
| @@ -165,6 +165,15 @@ static struct vfsmount *aio_mnt; | |||
| 165 | static const struct file_operations aio_ring_fops; | 165 | static const struct file_operations aio_ring_fops; |
| 166 | static const struct address_space_operations aio_ctx_aops; | 166 | static const struct address_space_operations aio_ctx_aops; |
| 167 | 167 | ||
| 168 | /* Backing dev info for aio fs. | ||
| 169 | * -no dirty page accounting or writeback happens | ||
| 170 | */ | ||
| 171 | static struct backing_dev_info aio_fs_backing_dev_info = { | ||
| 172 | .name = "aiofs", | ||
| 173 | .state = 0, | ||
| 174 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY, | ||
| 175 | }; | ||
| 176 | |||
| 168 | static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) | 177 | static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) |
| 169 | { | 178 | { |
| 170 | struct qstr this = QSTR_INIT("[aio]", 5); | 179 | struct qstr this = QSTR_INIT("[aio]", 5); |
| @@ -176,6 +185,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) | |||
| 176 | 185 | ||
| 177 | inode->i_mapping->a_ops = &aio_ctx_aops; | 186 | inode->i_mapping->a_ops = &aio_ctx_aops; |
| 178 | inode->i_mapping->private_data = ctx; | 187 | inode->i_mapping->private_data = ctx; |
| 188 | inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info; | ||
| 179 | inode->i_size = PAGE_SIZE * nr_pages; | 189 | inode->i_size = PAGE_SIZE * nr_pages; |
| 180 | 190 | ||
| 181 | path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); | 191 | path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); |
| @@ -220,6 +230,9 @@ static int __init aio_setup(void) | |||
| 220 | if (IS_ERR(aio_mnt)) | 230 | if (IS_ERR(aio_mnt)) |
| 221 | panic("Failed to create aio fs mount."); | 231 | panic("Failed to create aio fs mount."); |
| 222 | 232 | ||
| 233 | if (bdi_init(&aio_fs_backing_dev_info)) | ||
| 234 | panic("Failed to init aio fs backing dev info."); | ||
| 235 | |||
| 223 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 236 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
| 224 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 237 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
| 225 | 238 | ||
| @@ -281,11 +294,6 @@ static const struct file_operations aio_ring_fops = { | |||
| 281 | .mmap = aio_ring_mmap, | 294 | .mmap = aio_ring_mmap, |
| 282 | }; | 295 | }; |
| 283 | 296 | ||
| 284 | static int aio_set_page_dirty(struct page *page) | ||
| 285 | { | ||
| 286 | return 0; | ||
| 287 | } | ||
| 288 | |||
| 289 | #if IS_ENABLED(CONFIG_MIGRATION) | 297 | #if IS_ENABLED(CONFIG_MIGRATION) |
| 290 | static int aio_migratepage(struct address_space *mapping, struct page *new, | 298 | static int aio_migratepage(struct address_space *mapping, struct page *new, |
| 291 | struct page *old, enum migrate_mode mode) | 299 | struct page *old, enum migrate_mode mode) |
| @@ -357,7 +365,7 @@ out: | |||
| 357 | #endif | 365 | #endif |
| 358 | 366 | ||
| 359 | static const struct address_space_operations aio_ctx_aops = { | 367 | static const struct address_space_operations aio_ctx_aops = { |
| 360 | .set_page_dirty = aio_set_page_dirty, | 368 | .set_page_dirty = __set_page_dirty_no_writeback, |
| 361 | #if IS_ENABLED(CONFIG_MIGRATION) | 369 | #if IS_ENABLED(CONFIG_MIGRATION) |
| 362 | .migratepage = aio_migratepage, | 370 | .migratepage = aio_migratepage, |
| 363 | #endif | 371 | #endif |
| @@ -412,7 +420,6 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
| 412 | pr_debug("pid(%d) page[%d]->count=%d\n", | 420 | pr_debug("pid(%d) page[%d]->count=%d\n", |
| 413 | current->pid, i, page_count(page)); | 421 | current->pid, i, page_count(page)); |
| 414 | SetPageUptodate(page); | 422 | SetPageUptodate(page); |
| 415 | SetPageDirty(page); | ||
| 416 | unlock_page(page); | 423 | unlock_page(page); |
| 417 | 424 | ||
| 418 | ctx->ring_pages[i] = page; | 425 | ctx->ring_pages[i] = page; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index cc9d4114cda0..1d9c9f3754f8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -1585,7 +1585,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
| 1585 | } | 1585 | } |
| 1586 | EXPORT_SYMBOL_GPL(blkdev_write_iter); | 1586 | EXPORT_SYMBOL_GPL(blkdev_write_iter); |
| 1587 | 1587 | ||
| 1588 | static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) | 1588 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) |
| 1589 | { | 1589 | { |
| 1590 | struct file *file = iocb->ki_filp; | 1590 | struct file *file = iocb->ki_filp; |
| 1591 | struct inode *bd_inode = file->f_mapping->host; | 1591 | struct inode *bd_inode = file->f_mapping->host; |
| @@ -1599,6 +1599,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
| 1599 | iov_iter_truncate(to, size); | 1599 | iov_iter_truncate(to, size); |
| 1600 | return generic_file_read_iter(iocb, to); | 1600 | return generic_file_read_iter(iocb, to); |
| 1601 | } | 1601 | } |
| 1602 | EXPORT_SYMBOL_GPL(blkdev_read_iter); | ||
| 1602 | 1603 | ||
| 1603 | /* | 1604 | /* |
| 1604 | * Try to release a page associated with block device when the system | 1605 | * Try to release a page associated with block device when the system |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d3220d31d3cb..dcd9be32ac57 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -1011,8 +1011,6 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | |||
| 1011 | bytes = min(bytes, working_bytes); | 1011 | bytes = min(bytes, working_bytes); |
| 1012 | kaddr = kmap_atomic(page_out); | 1012 | kaddr = kmap_atomic(page_out); |
| 1013 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | 1013 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); |
| 1014 | if (*pg_index == (vcnt - 1) && *pg_offset == 0) | ||
| 1015 | memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); | ||
| 1016 | kunmap_atomic(kaddr); | 1014 | kunmap_atomic(kaddr); |
| 1017 | flush_dcache_page(page_out); | 1015 | flush_dcache_page(page_out); |
| 1018 | 1016 | ||
| @@ -1054,3 +1052,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | |||
| 1054 | 1052 | ||
| 1055 | return 1; | 1053 | return 1; |
| 1056 | } | 1054 | } |
| 1055 | |||
| 1056 | /* | ||
| 1057 | * When uncompressing data, we need to make sure and zero any parts of | ||
| 1058 | * the biovec that were not filled in by the decompression code. pg_index | ||
| 1059 | * and pg_offset indicate the last page and the last offset of that page | ||
| 1060 | * that have been filled in. This will zero everything remaining in the | ||
| 1061 | * biovec. | ||
| 1062 | */ | ||
| 1063 | void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, | ||
| 1064 | unsigned long pg_index, | ||
| 1065 | unsigned long pg_offset) | ||
| 1066 | { | ||
| 1067 | while (pg_index < vcnt) { | ||
| 1068 | struct page *page = bvec[pg_index].bv_page; | ||
| 1069 | unsigned long off = bvec[pg_index].bv_offset; | ||
| 1070 | unsigned long len = bvec[pg_index].bv_len; | ||
| 1071 | |||
| 1072 | if (pg_offset < off) | ||
| 1073 | pg_offset = off; | ||
| 1074 | if (pg_offset < off + len) { | ||
| 1075 | unsigned long bytes = off + len - pg_offset; | ||
| 1076 | char *kaddr; | ||
| 1077 | |||
| 1078 | kaddr = kmap_atomic(page); | ||
| 1079 | memset(kaddr + pg_offset, 0, bytes); | ||
| 1080 | kunmap_atomic(kaddr); | ||
| 1081 | } | ||
| 1082 | pg_index++; | ||
| 1083 | pg_offset = 0; | ||
| 1084 | } | ||
| 1085 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 0c803b4fbf93..d181f70caae0 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
| @@ -45,7 +45,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
| 45 | unsigned long nr_pages); | 45 | unsigned long nr_pages); |
| 46 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 46 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
| 47 | int mirror_num, unsigned long bio_flags); | 47 | int mirror_num, unsigned long bio_flags); |
| 48 | 48 | void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, | |
| 49 | unsigned long pg_index, | ||
| 50 | unsigned long pg_offset); | ||
| 49 | struct btrfs_compress_op { | 51 | struct btrfs_compress_op { |
| 50 | struct list_head *(*alloc_workspace)(void); | 52 | struct list_head *(*alloc_workspace)(void); |
| 51 | 53 | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19bc6162fb8e..150822ee0a0b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
| 80 | { | 80 | { |
| 81 | int i; | 81 | int i; |
| 82 | 82 | ||
| 83 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 84 | /* lockdep really cares that we take all of these spinlocks | ||
| 85 | * in the right order. If any of the locks in the path are not | ||
| 86 | * currently blocking, it is going to complain. So, make really | ||
| 87 | * really sure by forcing the path to blocking before we clear | ||
| 88 | * the path blocking. | ||
| 89 | */ | ||
| 90 | if (held) { | 83 | if (held) { |
| 91 | btrfs_set_lock_blocking_rw(held, held_rw); | 84 | btrfs_set_lock_blocking_rw(held, held_rw); |
| 92 | if (held_rw == BTRFS_WRITE_LOCK) | 85 | if (held_rw == BTRFS_WRITE_LOCK) |
| @@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
| 95 | held_rw = BTRFS_READ_LOCK_BLOCKING; | 88 | held_rw = BTRFS_READ_LOCK_BLOCKING; |
| 96 | } | 89 | } |
| 97 | btrfs_set_path_blocking(p); | 90 | btrfs_set_path_blocking(p); |
| 98 | #endif | ||
| 99 | 91 | ||
| 100 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | 92 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { |
| 101 | if (p->nodes[i] && p->locks[i]) { | 93 | if (p->nodes[i] && p->locks[i]) { |
| @@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
| 107 | } | 99 | } |
| 108 | } | 100 | } |
| 109 | 101 | ||
| 110 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 111 | if (held) | 102 | if (held) |
| 112 | btrfs_clear_lock_blocking_rw(held, held_rw); | 103 | btrfs_clear_lock_blocking_rw(held, held_rw); |
| 113 | #endif | ||
| 114 | } | 104 | } |
| 115 | 105 | ||
| 116 | /* this also releases the path */ | 106 | /* this also releases the path */ |
| @@ -2893,7 +2883,7 @@ cow_done: | |||
| 2893 | } | 2883 | } |
| 2894 | p->locks[level] = BTRFS_WRITE_LOCK; | 2884 | p->locks[level] = BTRFS_WRITE_LOCK; |
| 2895 | } else { | 2885 | } else { |
| 2896 | err = btrfs_try_tree_read_lock(b); | 2886 | err = btrfs_tree_read_lock_atomic(b); |
| 2897 | if (!err) { | 2887 | if (!err) { |
| 2898 | btrfs_set_path_blocking(p); | 2888 | btrfs_set_path_blocking(p); |
| 2899 | btrfs_tree_read_lock(b); | 2889 | btrfs_tree_read_lock(b); |
| @@ -3025,7 +3015,7 @@ again: | |||
| 3025 | } | 3015 | } |
| 3026 | 3016 | ||
| 3027 | level = btrfs_header_level(b); | 3017 | level = btrfs_header_level(b); |
| 3028 | err = btrfs_try_tree_read_lock(b); | 3018 | err = btrfs_tree_read_lock_atomic(b); |
| 3029 | if (!err) { | 3019 | if (!err) { |
| 3030 | btrfs_set_path_blocking(p); | 3020 | btrfs_set_path_blocking(p); |
| 3031 | btrfs_tree_read_lock(b); | 3021 | btrfs_tree_read_lock(b); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d557264ee974..fe69edda11fb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -3276,7 +3276,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 3276 | struct btrfs_root *root, unsigned long count); | 3276 | struct btrfs_root *root, unsigned long count); |
| 3277 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, | 3277 | int btrfs_async_run_delayed_refs(struct btrfs_root *root, |
| 3278 | unsigned long count, int wait); | 3278 | unsigned long count, int wait); |
| 3279 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 3279 | int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); |
| 3280 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | 3280 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, |
| 3281 | struct btrfs_root *root, u64 bytenr, | 3281 | struct btrfs_root *root, u64 bytenr, |
| 3282 | u64 offset, int metadata, u64 *refs, u64 *flags); | 3282 | u64 offset, int metadata, u64 *refs, u64 *flags); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ad0f47ac850..1bf9f897065d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -3817,19 +3817,19 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
| 3817 | struct btrfs_super_block *sb = fs_info->super_copy; | 3817 | struct btrfs_super_block *sb = fs_info->super_copy; |
| 3818 | int ret = 0; | 3818 | int ret = 0; |
| 3819 | 3819 | ||
| 3820 | if (sb->root_level > BTRFS_MAX_LEVEL) { | 3820 | if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { |
| 3821 | printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", | 3821 | printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n", |
| 3822 | sb->root_level, BTRFS_MAX_LEVEL); | 3822 | btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); |
| 3823 | ret = -EINVAL; | 3823 | ret = -EINVAL; |
| 3824 | } | 3824 | } |
| 3825 | if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { | 3825 | if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { |
| 3826 | printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", | 3826 | printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n", |
| 3827 | sb->chunk_root_level, BTRFS_MAX_LEVEL); | 3827 | btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); |
| 3828 | ret = -EINVAL; | 3828 | ret = -EINVAL; |
| 3829 | } | 3829 | } |
| 3830 | if (sb->log_root_level > BTRFS_MAX_LEVEL) { | 3830 | if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { |
| 3831 | printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", | 3831 | printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n", |
| 3832 | sb->log_root_level, BTRFS_MAX_LEVEL); | 3832 | btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); |
| 3833 | ret = -EINVAL; | 3833 | ret = -EINVAL; |
| 3834 | } | 3834 | } |
| 3835 | 3835 | ||
| @@ -3837,15 +3837,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
| 3837 | * The common minimum, we don't know if we can trust the nodesize/sectorsize | 3837 | * The common minimum, we don't know if we can trust the nodesize/sectorsize |
| 3838 | * items yet, they'll be verified later. Issue just a warning. | 3838 | * items yet, they'll be verified later. Issue just a warning. |
| 3839 | */ | 3839 | */ |
| 3840 | if (!IS_ALIGNED(sb->root, 4096)) | 3840 | if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) |
| 3841 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3841 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", |
| 3842 | sb->root); | 3842 | sb->root); |
| 3843 | if (!IS_ALIGNED(sb->chunk_root, 4096)) | 3843 | if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) |
| 3844 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3844 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", |
| 3845 | sb->chunk_root); | 3845 | sb->chunk_root); |
| 3846 | if (!IS_ALIGNED(sb->log_root, 4096)) | 3846 | if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) |
| 3847 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | 3847 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", |
| 3848 | sb->log_root); | 3848 | btrfs_super_log_root(sb)); |
| 3849 | 3849 | ||
| 3850 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { | 3850 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { |
| 3851 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", | 3851 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", |
| @@ -3857,13 +3857,13 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
| 3857 | * Hint to catch really bogus numbers, bitflips or so, more exact checks are | 3857 | * Hint to catch really bogus numbers, bitflips or so, more exact checks are |
| 3858 | * done later | 3858 | * done later |
| 3859 | */ | 3859 | */ |
| 3860 | if (sb->num_devices > (1UL << 31)) | 3860 | if (btrfs_super_num_devices(sb) > (1UL << 31)) |
| 3861 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", | 3861 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", |
| 3862 | sb->num_devices); | 3862 | btrfs_super_num_devices(sb)); |
| 3863 | 3863 | ||
| 3864 | if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { | 3864 | if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { |
| 3865 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", | 3865 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", |
| 3866 | sb->bytenr, BTRFS_SUPER_INFO_OFFSET); | 3866 | btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); |
| 3867 | ret = -EINVAL; | 3867 | ret = -EINVAL; |
| 3868 | } | 3868 | } |
| 3869 | 3869 | ||
| @@ -3871,14 +3871,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | |||
| 3871 | * The generation is a global counter, we'll trust it more than the others | 3871 | * The generation is a global counter, we'll trust it more than the others |
| 3872 | * but it's still possible that it's the one that's wrong. | 3872 | * but it's still possible that it's the one that's wrong. |
| 3873 | */ | 3873 | */ |
| 3874 | if (sb->generation < sb->chunk_root_generation) | 3874 | if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) |
| 3875 | printk(KERN_WARNING | 3875 | printk(KERN_WARNING |
| 3876 | "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", | 3876 | "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", |
| 3877 | sb->generation, sb->chunk_root_generation); | 3877 | btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); |
| 3878 | if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) | 3878 | if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) |
| 3879 | && btrfs_super_cache_generation(sb) != (u64)-1) | ||
| 3879 | printk(KERN_WARNING | 3880 | printk(KERN_WARNING |
| 3880 | "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", | 3881 | "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", |
| 3881 | sb->generation, sb->cache_generation); | 3882 | btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); |
| 3882 | 3883 | ||
| 3883 | return ret; | 3884 | return ret; |
| 3884 | } | 3885 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d56589571012..47c1ba141082 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -710,8 +710,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | |||
| 710 | rcu_read_unlock(); | 710 | rcu_read_unlock(); |
| 711 | } | 711 | } |
| 712 | 712 | ||
| 713 | /* simple helper to search for an existing extent at a given offset */ | 713 | /* simple helper to search for an existing data extent at a given offset */ |
| 714 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | 714 | int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len) |
| 715 | { | 715 | { |
| 716 | int ret; | 716 | int ret; |
| 717 | struct btrfs_key key; | 717 | struct btrfs_key key; |
| @@ -726,12 +726,6 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 726 | key.type = BTRFS_EXTENT_ITEM_KEY; | 726 | key.type = BTRFS_EXTENT_ITEM_KEY; |
| 727 | ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, | 727 | ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, |
| 728 | 0, 0); | 728 | 0, 0); |
| 729 | if (ret > 0) { | ||
| 730 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 731 | if (key.objectid == start && | ||
| 732 | key.type == BTRFS_METADATA_ITEM_KEY) | ||
| 733 | ret = 0; | ||
| 734 | } | ||
| 735 | btrfs_free_path(path); | 729 | btrfs_free_path(path); |
| 736 | return ret; | 730 | return ret; |
| 737 | } | 731 | } |
| @@ -786,7 +780,6 @@ search_again: | |||
| 786 | else | 780 | else |
| 787 | key.type = BTRFS_EXTENT_ITEM_KEY; | 781 | key.type = BTRFS_EXTENT_ITEM_KEY; |
| 788 | 782 | ||
| 789 | again: | ||
| 790 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | 783 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, |
| 791 | &key, path, 0, 0); | 784 | &key, path, 0, 0); |
| 792 | if (ret < 0) | 785 | if (ret < 0) |
| @@ -802,13 +795,6 @@ again: | |||
| 802 | key.offset == root->nodesize) | 795 | key.offset == root->nodesize) |
| 803 | ret = 0; | 796 | ret = 0; |
| 804 | } | 797 | } |
| 805 | if (ret) { | ||
| 806 | key.objectid = bytenr; | ||
| 807 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 808 | key.offset = root->nodesize; | ||
| 809 | btrfs_release_path(path); | ||
| 810 | goto again; | ||
| 811 | } | ||
| 812 | } | 798 | } |
| 813 | 799 | ||
| 814 | if (ret == 0) { | 800 | if (ret == 0) { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 783a94355efd..84a2d1868271 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 413 | ret = 0; | 413 | ret = 0; |
| 414 | fail: | 414 | fail: |
| 415 | while (ret < 0 && !list_empty(&tmplist)) { | 415 | while (ret < 0 && !list_empty(&tmplist)) { |
| 416 | sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); | 416 | sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); |
| 417 | list_del(&sums->list); | 417 | list_del(&sums->list); |
| 418 | kfree(sums); | 418 | kfree(sums); |
| 419 | } | 419 | } |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d2b76e29d3b..4399f0c3a4ce 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -765,23 +765,6 @@ out: | |||
| 765 | return ret; | 765 | return ret; |
| 766 | } | 766 | } |
| 767 | 767 | ||
| 768 | /* copy of check_sticky in fs/namei.c() | ||
| 769 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
| 770 | * minimal. | ||
| 771 | */ | ||
| 772 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | ||
| 773 | { | ||
| 774 | kuid_t fsuid = current_fsuid(); | ||
| 775 | |||
| 776 | if (!(dir->i_mode & S_ISVTX)) | ||
| 777 | return 0; | ||
| 778 | if (uid_eq(inode->i_uid, fsuid)) | ||
| 779 | return 0; | ||
| 780 | if (uid_eq(dir->i_uid, fsuid)) | ||
| 781 | return 0; | ||
| 782 | return !capable(CAP_FOWNER); | ||
| 783 | } | ||
| 784 | |||
| 785 | /* copy of may_delete in fs/namei.c() | 768 | /* copy of may_delete in fs/namei.c() |
| 786 | * Check whether we can remove a link victim from directory dir, check | 769 | * Check whether we can remove a link victim from directory dir, check |
| 787 | * whether the type of victim is right. | 770 | * whether the type of victim is right. |
| @@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) | |||
| 817 | return error; | 800 | return error; |
| 818 | if (IS_APPEND(dir)) | 801 | if (IS_APPEND(dir)) |
| 819 | return -EPERM; | 802 | return -EPERM; |
| 820 | if (btrfs_check_sticky(dir, victim->d_inode)|| | 803 | if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) || |
| 821 | IS_APPEND(victim->d_inode)|| | ||
| 822 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) | 804 | IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) |
| 823 | return -EPERM; | 805 | return -EPERM; |
| 824 | if (isdir) { | 806 | if (isdir) { |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5665d2149249..f8229ef1b46d 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
| @@ -128,6 +128,26 @@ again: | |||
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | /* | 130 | /* |
| 131 | * take a spinning read lock. | ||
| 132 | * returns 1 if we get the read lock and 0 if we don't | ||
| 133 | * this won't wait for blocking writers | ||
| 134 | */ | ||
| 135 | int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) | ||
| 136 | { | ||
| 137 | if (atomic_read(&eb->blocking_writers)) | ||
| 138 | return 0; | ||
| 139 | |||
| 140 | read_lock(&eb->lock); | ||
| 141 | if (atomic_read(&eb->blocking_writers)) { | ||
| 142 | read_unlock(&eb->lock); | ||
| 143 | return 0; | ||
| 144 | } | ||
| 145 | atomic_inc(&eb->read_locks); | ||
| 146 | atomic_inc(&eb->spinning_readers); | ||
| 147 | return 1; | ||
| 148 | } | ||
| 149 | |||
| 150 | /* | ||
| 131 | * returns 1 if we get the read lock and 0 if we don't | 151 | * returns 1 if we get the read lock and 0 if we don't |
| 132 | * this won't wait for blocking writers | 152 | * this won't wait for blocking writers |
| 133 | */ | 153 | */ |
| @@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) | |||
| 158 | atomic_read(&eb->blocking_readers)) | 178 | atomic_read(&eb->blocking_readers)) |
| 159 | return 0; | 179 | return 0; |
| 160 | 180 | ||
| 161 | if (!write_trylock(&eb->lock)) | 181 | write_lock(&eb->lock); |
| 162 | return 0; | ||
| 163 | |||
| 164 | if (atomic_read(&eb->blocking_writers) || | 182 | if (atomic_read(&eb->blocking_writers) || |
| 165 | atomic_read(&eb->blocking_readers)) { | 183 | atomic_read(&eb->blocking_readers)) { |
| 166 | write_unlock(&eb->lock); | 184 | write_unlock(&eb->lock); |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index b81e0e9a4894..c44a9d5f5362 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
| @@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); | |||
| 35 | void btrfs_assert_tree_locked(struct extent_buffer *eb); | 35 | void btrfs_assert_tree_locked(struct extent_buffer *eb); |
| 36 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); | 36 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); |
| 37 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); | 37 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); |
| 38 | int btrfs_tree_read_lock_atomic(struct extent_buffer *eb); | ||
| 39 | |||
| 38 | 40 | ||
| 39 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) | 41 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) |
| 40 | { | 42 | { |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 78285f30909e..617553cdb7d3 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
| @@ -373,6 +373,8 @@ cont: | |||
| 373 | } | 373 | } |
| 374 | done: | 374 | done: |
| 375 | kunmap(pages_in[page_in_index]); | 375 | kunmap(pages_in[page_in_index]); |
| 376 | if (!ret) | ||
| 377 | btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); | ||
| 376 | return ret; | 378 | return ret; |
| 377 | } | 379 | } |
| 378 | 380 | ||
| @@ -410,10 +412,23 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 410 | goto out; | 412 | goto out; |
| 411 | } | 413 | } |
| 412 | 414 | ||
| 415 | /* | ||
| 416 | * the caller is already checking against PAGE_SIZE, but lets | ||
| 417 | * move this check closer to the memcpy/memset | ||
| 418 | */ | ||
| 419 | destlen = min_t(unsigned long, destlen, PAGE_SIZE); | ||
| 413 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | 420 | bytes = min_t(unsigned long, destlen, out_len - start_byte); |
| 414 | 421 | ||
| 415 | kaddr = kmap_atomic(dest_page); | 422 | kaddr = kmap_atomic(dest_page); |
| 416 | memcpy(kaddr, workspace->buf + start_byte, bytes); | 423 | memcpy(kaddr, workspace->buf + start_byte, bytes); |
| 424 | |||
| 425 | /* | ||
| 426 | * btrfs_getblock is doing a zero on the tail of the page too, | ||
| 427 | * but this will cover anything missing from the decompressed | ||
| 428 | * data. | ||
| 429 | */ | ||
| 430 | if (bytes < destlen) | ||
| 431 | memset(kaddr+bytes, 0, destlen-bytes); | ||
| 417 | kunmap_atomic(kaddr); | 432 | kunmap_atomic(kaddr); |
| 418 | out: | 433 | out: |
| 419 | return ret; | 434 | return ret; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a2b97ef10317..54bd91ece35b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -2151,6 +2151,7 @@ static void __exit exit_btrfs_fs(void) | |||
| 2151 | extent_map_exit(); | 2151 | extent_map_exit(); |
| 2152 | extent_io_exit(); | 2152 | extent_io_exit(); |
| 2153 | btrfs_interface_exit(); | 2153 | btrfs_interface_exit(); |
| 2154 | btrfs_end_io_wq_exit(); | ||
| 2154 | unregister_filesystem(&btrfs_fs_type); | 2155 | unregister_filesystem(&btrfs_fs_type); |
| 2155 | btrfs_exit_sysfs(); | 2156 | btrfs_exit_sysfs(); |
| 2156 | btrfs_cleanup_fs_uuids(); | 2157 | btrfs_cleanup_fs_uuids(); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1475979e5718..286213cec861 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -672,7 +672,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 672 | * is this extent already allocated in the extent | 672 | * is this extent already allocated in the extent |
| 673 | * allocation tree? If so, just add a reference | 673 | * allocation tree? If so, just add a reference |
| 674 | */ | 674 | */ |
| 675 | ret = btrfs_lookup_extent(root, ins.objectid, | 675 | ret = btrfs_lookup_data_extent(root, ins.objectid, |
| 676 | ins.offset); | 676 | ins.offset); |
| 677 | if (ret == 0) { | 677 | if (ret == 0) { |
| 678 | ret = btrfs_inc_extent_ref(trans, root, | 678 | ret = btrfs_inc_extent_ref(trans, root, |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 759fa4e2de8f..fb22fd8d8fb8 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
| @@ -299,6 +299,8 @@ done: | |||
| 299 | zlib_inflateEnd(&workspace->strm); | 299 | zlib_inflateEnd(&workspace->strm); |
| 300 | if (data_in) | 300 | if (data_in) |
| 301 | kunmap(pages_in[page_in_index]); | 301 | kunmap(pages_in[page_in_index]); |
| 302 | if (!ret) | ||
| 303 | btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); | ||
| 302 | return ret; | 304 | return ret; |
| 303 | } | 305 | } |
| 304 | 306 | ||
| @@ -310,10 +312,14 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 310 | struct workspace *workspace = list_entry(ws, struct workspace, list); | 312 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 311 | int ret = 0; | 313 | int ret = 0; |
| 312 | int wbits = MAX_WBITS; | 314 | int wbits = MAX_WBITS; |
| 313 | unsigned long bytes_left = destlen; | 315 | unsigned long bytes_left; |
| 314 | unsigned long total_out = 0; | 316 | unsigned long total_out = 0; |
| 317 | unsigned long pg_offset = 0; | ||
| 315 | char *kaddr; | 318 | char *kaddr; |
| 316 | 319 | ||
| 320 | destlen = min_t(unsigned long, destlen, PAGE_SIZE); | ||
| 321 | bytes_left = destlen; | ||
| 322 | |||
| 317 | workspace->strm.next_in = data_in; | 323 | workspace->strm.next_in = data_in; |
| 318 | workspace->strm.avail_in = srclen; | 324 | workspace->strm.avail_in = srclen; |
| 319 | workspace->strm.total_in = 0; | 325 | workspace->strm.total_in = 0; |
| @@ -341,7 +347,6 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 341 | unsigned long buf_start; | 347 | unsigned long buf_start; |
| 342 | unsigned long buf_offset; | 348 | unsigned long buf_offset; |
| 343 | unsigned long bytes; | 349 | unsigned long bytes; |
| 344 | unsigned long pg_offset = 0; | ||
| 345 | 350 | ||
| 346 | ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); | 351 | ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); |
| 347 | if (ret != Z_OK && ret != Z_STREAM_END) | 352 | if (ret != Z_OK && ret != Z_STREAM_END) |
| @@ -384,6 +389,17 @@ next: | |||
| 384 | ret = 0; | 389 | ret = 0; |
| 385 | 390 | ||
| 386 | zlib_inflateEnd(&workspace->strm); | 391 | zlib_inflateEnd(&workspace->strm); |
| 392 | |||
| 393 | /* | ||
| 394 | * this should only happen if zlib returned fewer bytes than we | ||
| 395 | * expected. btrfs_get_block is responsible for zeroing from the | ||
| 396 | * end of the inline extent (destlen) to the end of the page | ||
| 397 | */ | ||
| 398 | if (pg_offset < destlen) { | ||
| 399 | kaddr = kmap_atomic(dest_page); | ||
| 400 | memset(kaddr + pg_offset, 0, destlen - pg_offset); | ||
| 401 | kunmap_atomic(kaddr); | ||
| 402 | } | ||
| 387 | return ret; | 403 | return ret; |
| 388 | } | 404 | } |
| 389 | 405 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 9614adc7e754..20805db2c987 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -128,21 +128,15 @@ __clear_page_buffers(struct page *page) | |||
| 128 | page_cache_release(page); | 128 | page_cache_release(page); |
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | 131 | static void buffer_io_error(struct buffer_head *bh, char *msg) | |
| 132 | static int quiet_error(struct buffer_head *bh) | ||
| 133 | { | ||
| 134 | if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit()) | ||
| 135 | return 0; | ||
| 136 | return 1; | ||
| 137 | } | ||
| 138 | |||
| 139 | |||
| 140 | static void buffer_io_error(struct buffer_head *bh) | ||
| 141 | { | 132 | { |
| 142 | char b[BDEVNAME_SIZE]; | 133 | char b[BDEVNAME_SIZE]; |
| 143 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", | 134 | |
| 135 | if (!test_bit(BH_Quiet, &bh->b_state)) | ||
| 136 | printk_ratelimited(KERN_ERR | ||
| 137 | "Buffer I/O error on dev %s, logical block %llu%s\n", | ||
| 144 | bdevname(bh->b_bdev, b), | 138 | bdevname(bh->b_bdev, b), |
| 145 | (unsigned long long)bh->b_blocknr); | 139 | (unsigned long long)bh->b_blocknr, msg); |
| 146 | } | 140 | } |
| 147 | 141 | ||
| 148 | /* | 142 | /* |
| @@ -177,17 +171,10 @@ EXPORT_SYMBOL(end_buffer_read_sync); | |||
| 177 | 171 | ||
| 178 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 172 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
| 179 | { | 173 | { |
| 180 | char b[BDEVNAME_SIZE]; | ||
| 181 | |||
| 182 | if (uptodate) { | 174 | if (uptodate) { |
| 183 | set_buffer_uptodate(bh); | 175 | set_buffer_uptodate(bh); |
| 184 | } else { | 176 | } else { |
| 185 | if (!quiet_error(bh)) { | 177 | buffer_io_error(bh, ", lost sync page write"); |
| 186 | buffer_io_error(bh); | ||
| 187 | printk(KERN_WARNING "lost page write due to " | ||
| 188 | "I/O error on %s\n", | ||
| 189 | bdevname(bh->b_bdev, b)); | ||
| 190 | } | ||
| 191 | set_buffer_write_io_error(bh); | 178 | set_buffer_write_io_error(bh); |
| 192 | clear_buffer_uptodate(bh); | 179 | clear_buffer_uptodate(bh); |
| 193 | } | 180 | } |
| @@ -304,8 +291,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
| 304 | set_buffer_uptodate(bh); | 291 | set_buffer_uptodate(bh); |
| 305 | } else { | 292 | } else { |
| 306 | clear_buffer_uptodate(bh); | 293 | clear_buffer_uptodate(bh); |
| 307 | if (!quiet_error(bh)) | 294 | buffer_io_error(bh, ", async page read"); |
| 308 | buffer_io_error(bh); | ||
| 309 | SetPageError(page); | 295 | SetPageError(page); |
| 310 | } | 296 | } |
| 311 | 297 | ||
| @@ -353,7 +339,6 @@ still_busy: | |||
| 353 | */ | 339 | */ |
| 354 | void end_buffer_async_write(struct buffer_head *bh, int uptodate) | 340 | void end_buffer_async_write(struct buffer_head *bh, int uptodate) |
| 355 | { | 341 | { |
| 356 | char b[BDEVNAME_SIZE]; | ||
| 357 | unsigned long flags; | 342 | unsigned long flags; |
| 358 | struct buffer_head *first; | 343 | struct buffer_head *first; |
| 359 | struct buffer_head *tmp; | 344 | struct buffer_head *tmp; |
| @@ -365,12 +350,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |||
| 365 | if (uptodate) { | 350 | if (uptodate) { |
| 366 | set_buffer_uptodate(bh); | 351 | set_buffer_uptodate(bh); |
| 367 | } else { | 352 | } else { |
| 368 | if (!quiet_error(bh)) { | 353 | buffer_io_error(bh, ", lost async page write"); |
| 369 | buffer_io_error(bh); | ||
| 370 | printk(KERN_WARNING "lost page write due to " | ||
| 371 | "I/O error on %s\n", | ||
| 372 | bdevname(bh->b_bdev, b)); | ||
| 373 | } | ||
| 374 | set_bit(AS_EIO, &page->mapping->flags); | 354 | set_bit(AS_EIO, &page->mapping->flags); |
| 375 | set_buffer_write_io_error(bh); | 355 | set_buffer_write_io_error(bh); |
| 376 | clear_buffer_uptodate(bh); | 356 | clear_buffer_uptodate(bh); |
| @@ -993,7 +973,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
| 993 | */ | 973 | */ |
| 994 | static int | 974 | static int |
| 995 | grow_dev_page(struct block_device *bdev, sector_t block, | 975 | grow_dev_page(struct block_device *bdev, sector_t block, |
| 996 | pgoff_t index, int size, int sizebits) | 976 | pgoff_t index, int size, int sizebits, gfp_t gfp) |
| 997 | { | 977 | { |
| 998 | struct inode *inode = bdev->bd_inode; | 978 | struct inode *inode = bdev->bd_inode; |
| 999 | struct page *page; | 979 | struct page *page; |
| @@ -1002,8 +982,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
| 1002 | int ret = 0; /* Will call free_more_memory() */ | 982 | int ret = 0; /* Will call free_more_memory() */ |
| 1003 | gfp_t gfp_mask; | 983 | gfp_t gfp_mask; |
| 1004 | 984 | ||
| 1005 | gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; | 985 | gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; |
| 1006 | gfp_mask |= __GFP_MOVABLE; | 986 | |
| 1007 | /* | 987 | /* |
| 1008 | * XXX: __getblk_slow() can not really deal with failure and | 988 | * XXX: __getblk_slow() can not really deal with failure and |
| 1009 | * will endlessly loop on improvised global reclaim. Prefer | 989 | * will endlessly loop on improvised global reclaim. Prefer |
| @@ -1060,7 +1040,7 @@ failed: | |||
| 1060 | * that page was dirty, the buffers are set dirty also. | 1040 | * that page was dirty, the buffers are set dirty also. |
| 1061 | */ | 1041 | */ |
| 1062 | static int | 1042 | static int |
| 1063 | grow_buffers(struct block_device *bdev, sector_t block, int size) | 1043 | grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) |
| 1064 | { | 1044 | { |
| 1065 | pgoff_t index; | 1045 | pgoff_t index; |
| 1066 | int sizebits; | 1046 | int sizebits; |
| @@ -1087,11 +1067,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) | |||
| 1087 | } | 1067 | } |
| 1088 | 1068 | ||
| 1089 | /* Create a page with the proper size buffers.. */ | 1069 | /* Create a page with the proper size buffers.. */ |
| 1090 | return grow_dev_page(bdev, block, index, size, sizebits); | 1070 | return grow_dev_page(bdev, block, index, size, sizebits, gfp); |
| 1091 | } | 1071 | } |
| 1092 | 1072 | ||
| 1093 | static struct buffer_head * | 1073 | struct buffer_head * |
| 1094 | __getblk_slow(struct block_device *bdev, sector_t block, int size) | 1074 | __getblk_slow(struct block_device *bdev, sector_t block, |
| 1075 | unsigned size, gfp_t gfp) | ||
| 1095 | { | 1076 | { |
| 1096 | /* Size must be multiple of hard sectorsize */ | 1077 | /* Size must be multiple of hard sectorsize */ |
| 1097 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || | 1078 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || |
| @@ -1113,13 +1094,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
| 1113 | if (bh) | 1094 | if (bh) |
| 1114 | return bh; | 1095 | return bh; |
| 1115 | 1096 | ||
| 1116 | ret = grow_buffers(bdev, block, size); | 1097 | ret = grow_buffers(bdev, block, size, gfp); |
| 1117 | if (ret < 0) | 1098 | if (ret < 0) |
| 1118 | return NULL; | 1099 | return NULL; |
| 1119 | if (ret == 0) | 1100 | if (ret == 0) |
| 1120 | free_more_memory(); | 1101 | free_more_memory(); |
| 1121 | } | 1102 | } |
| 1122 | } | 1103 | } |
| 1104 | EXPORT_SYMBOL(__getblk_slow); | ||
| 1123 | 1105 | ||
| 1124 | /* | 1106 | /* |
| 1125 | * The relationship between dirty buffers and dirty pages: | 1107 | * The relationship between dirty buffers and dirty pages: |
| @@ -1373,24 +1355,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) | |||
| 1373 | EXPORT_SYMBOL(__find_get_block); | 1355 | EXPORT_SYMBOL(__find_get_block); |
| 1374 | 1356 | ||
| 1375 | /* | 1357 | /* |
| 1376 | * __getblk will locate (and, if necessary, create) the buffer_head | 1358 | * __getblk_gfp() will locate (and, if necessary, create) the buffer_head |
| 1377 | * which corresponds to the passed block_device, block and size. The | 1359 | * which corresponds to the passed block_device, block and size. The |
| 1378 | * returned buffer has its reference count incremented. | 1360 | * returned buffer has its reference count incremented. |
| 1379 | * | 1361 | * |
| 1380 | * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() | 1362 | * __getblk_gfp() will lock up the machine if grow_dev_page's |
| 1381 | * attempt is failing. FIXME, perhaps? | 1363 | * try_to_free_buffers() attempt is failing. FIXME, perhaps? |
| 1382 | */ | 1364 | */ |
| 1383 | struct buffer_head * | 1365 | struct buffer_head * |
| 1384 | __getblk(struct block_device *bdev, sector_t block, unsigned size) | 1366 | __getblk_gfp(struct block_device *bdev, sector_t block, |
| 1367 | unsigned size, gfp_t gfp) | ||
| 1385 | { | 1368 | { |
| 1386 | struct buffer_head *bh = __find_get_block(bdev, block, size); | 1369 | struct buffer_head *bh = __find_get_block(bdev, block, size); |
| 1387 | 1370 | ||
| 1388 | might_sleep(); | 1371 | might_sleep(); |
| 1389 | if (bh == NULL) | 1372 | if (bh == NULL) |
| 1390 | bh = __getblk_slow(bdev, block, size); | 1373 | bh = __getblk_slow(bdev, block, size, gfp); |
| 1391 | return bh; | 1374 | return bh; |
| 1392 | } | 1375 | } |
| 1393 | EXPORT_SYMBOL(__getblk); | 1376 | EXPORT_SYMBOL(__getblk_gfp); |
| 1394 | 1377 | ||
| 1395 | /* | 1378 | /* |
| 1396 | * Do async read-ahead on a buffer.. | 1379 | * Do async read-ahead on a buffer.. |
| @@ -1406,24 +1389,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) | |||
| 1406 | EXPORT_SYMBOL(__breadahead); | 1389 | EXPORT_SYMBOL(__breadahead); |
| 1407 | 1390 | ||
| 1408 | /** | 1391 | /** |
| 1409 | * __bread() - reads a specified block and returns the bh | 1392 | * __bread_gfp() - reads a specified block and returns the bh |
| 1410 | * @bdev: the block_device to read from | 1393 | * @bdev: the block_device to read from |
| 1411 | * @block: number of block | 1394 | * @block: number of block |
| 1412 | * @size: size (in bytes) to read | 1395 | * @size: size (in bytes) to read |
| 1413 | * | 1396 | * @gfp: page allocation flag |
| 1397 | * | ||
| 1414 | * Reads a specified block, and returns buffer head that contains it. | 1398 | * Reads a specified block, and returns buffer head that contains it. |
| 1399 | * The page cache can be allocated from non-movable area | ||
| 1400 | * not to prevent page migration if you set gfp to zero. | ||
| 1415 | * It returns NULL if the block was unreadable. | 1401 | * It returns NULL if the block was unreadable. |
| 1416 | */ | 1402 | */ |
| 1417 | struct buffer_head * | 1403 | struct buffer_head * |
| 1418 | __bread(struct block_device *bdev, sector_t block, unsigned size) | 1404 | __bread_gfp(struct block_device *bdev, sector_t block, |
| 1405 | unsigned size, gfp_t gfp) | ||
| 1419 | { | 1406 | { |
| 1420 | struct buffer_head *bh = __getblk(bdev, block, size); | 1407 | struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); |
| 1421 | 1408 | ||
| 1422 | if (likely(bh) && !buffer_uptodate(bh)) | 1409 | if (likely(bh) && !buffer_uptodate(bh)) |
| 1423 | bh = __bread_slow(bh); | 1410 | bh = __bread_slow(bh); |
| 1424 | return bh; | 1411 | return bh; |
| 1425 | } | 1412 | } |
| 1426 | EXPORT_SYMBOL(__bread); | 1413 | EXPORT_SYMBOL(__bread_gfp); |
| 1427 | 1414 | ||
| 1428 | /* | 1415 | /* |
| 1429 | * invalidate_bh_lrus() is called rarely - but not only at unmount. | 1416 | * invalidate_bh_lrus() is called rarely - but not only at unmount. |
| @@ -2082,6 +2069,7 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
| 2082 | struct page *page, void *fsdata) | 2069 | struct page *page, void *fsdata) |
| 2083 | { | 2070 | { |
| 2084 | struct inode *inode = mapping->host; | 2071 | struct inode *inode = mapping->host; |
| 2072 | loff_t old_size = inode->i_size; | ||
| 2085 | int i_size_changed = 0; | 2073 | int i_size_changed = 0; |
| 2086 | 2074 | ||
| 2087 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 2075 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
| @@ -2101,6 +2089,8 @@ int generic_write_end(struct file *file, struct address_space *mapping, | |||
| 2101 | unlock_page(page); | 2089 | unlock_page(page); |
| 2102 | page_cache_release(page); | 2090 | page_cache_release(page); |
| 2103 | 2091 | ||
| 2092 | if (old_size < pos) | ||
| 2093 | pagecache_isize_extended(inode, old_size, pos); | ||
| 2104 | /* | 2094 | /* |
| 2105 | * Don't mark the inode dirty under page lock. First, it unnecessarily | 2095 | * Don't mark the inode dirty under page lock. First, it unnecessarily |
| 2106 | * makes the holding time of page lock longer. Second, it forces lock | 2096 | * makes the holding time of page lock longer. Second, it forces lock |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 659f2ea9e6f7..cefca661464b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
| 2638 | 2638 | ||
| 2639 | for (i = 0; i < CEPH_CAP_BITS; i++) | 2639 | for (i = 0; i < CEPH_CAP_BITS; i++) |
| 2640 | if ((dirty & (1 << i)) && | 2640 | if ((dirty & (1 << i)) && |
| 2641 | flush_tid == ci->i_cap_flush_tid[i]) | 2641 | (u16)flush_tid == ci->i_cap_flush_tid[i]) |
| 2642 | cleaned |= 1 << i; | 2642 | cleaned |= 1 << i; |
| 2643 | 2643 | ||
| 2644 | dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," | 2644 | dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," |
diff --git a/fs/dcache.c b/fs/dcache.c index d5a23fd0da90..5bc72b07fde2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -778,6 +778,7 @@ restart: | |||
| 778 | struct dentry *parent = lock_parent(dentry); | 778 | struct dentry *parent = lock_parent(dentry); |
| 779 | if (likely(!dentry->d_lockref.count)) { | 779 | if (likely(!dentry->d_lockref.count)) { |
| 780 | __dentry_kill(dentry); | 780 | __dentry_kill(dentry); |
| 781 | dput(parent); | ||
| 781 | goto restart; | 782 | goto restart; |
| 782 | } | 783 | } |
| 783 | if (parent) | 784 | if (parent) |
| @@ -2673,11 +2674,13 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
| 2673 | if (!IS_ROOT(new)) { | 2674 | if (!IS_ROOT(new)) { |
| 2674 | spin_unlock(&inode->i_lock); | 2675 | spin_unlock(&inode->i_lock); |
| 2675 | dput(new); | 2676 | dput(new); |
| 2677 | iput(inode); | ||
| 2676 | return ERR_PTR(-EIO); | 2678 | return ERR_PTR(-EIO); |
| 2677 | } | 2679 | } |
| 2678 | if (d_ancestor(new, dentry)) { | 2680 | if (d_ancestor(new, dentry)) { |
| 2679 | spin_unlock(&inode->i_lock); | 2681 | spin_unlock(&inode->i_lock); |
| 2680 | dput(new); | 2682 | dput(new); |
| 2683 | iput(inode); | ||
| 2681 | return ERR_PTR(-EIO); | 2684 | return ERR_PTR(-EIO); |
| 2682 | } | 2685 | } |
| 2683 | write_seqlock(&rename_lock); | 2686 | write_seqlock(&rename_lock); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1b119d3bf924..c4cd1fd86cc2 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
| @@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
| 566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
| 567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; | 567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; |
| 568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; | 568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; |
| 569 | s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; | ||
| 570 | |||
| 571 | rc = -EINVAL; | ||
| 572 | if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
| 573 | pr_err("eCryptfs: maximum fs stacking depth exceeded\n"); | ||
| 574 | goto out_free; | ||
| 575 | } | ||
| 569 | 576 | ||
| 570 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); | 577 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); |
| 571 | rc = PTR_ERR(inode); | 578 | rc = PTR_ERR(inode); |
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index 389ba8312d5d..b47c7b8dc275 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | # Copyright (C) 2008 Panasas Inc. All rights reserved. | 4 | # Copyright (C) 2008 Panasas Inc. All rights reserved. |
| 5 | # | 5 | # |
| 6 | # Authors: | 6 | # Authors: |
| 7 | # Boaz Harrosh <bharrosh@panasas.com> | 7 | # Boaz Harrosh <ooo@electrozaur.com> |
| 8 | # | 8 | # |
| 9 | # This program is free software; you can redistribute it and/or modify | 9 | # This program is free software; you can redistribute it and/or modify |
| 10 | # it under the terms of the GNU General Public License version 2 | 10 | # it under the terms of the GNU General Public License version 2 |
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index 3bbd46956d77..7d88ef566213 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | * Copyright (C) 2005, 2006 | 4 | * Copyright (C) 2005, 2006 |
| 5 | * Avishay Traeger (avishay@gmail.com) | 5 | * Avishay Traeger (avishay@gmail.com) |
| 6 | * Copyright (C) 2008, 2009 | 6 | * Copyright (C) 2008, 2009 |
| 7 | * Boaz Harrosh <bharrosh@panasas.com> | 7 | * Boaz Harrosh <ooo@electrozaur.com> |
| 8 | * | 8 | * |
| 9 | * Copyrights for code taken from ext2: | 9 | * Copyrights for code taken from ext2: |
| 10 | * Copyright (C) 1992, 1993, 1994, 1995 | 10 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 49f51ab4caac..d7defd557601 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
| 8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index fffe86fd7a42..ad9cac670a47 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
| 8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 71bf8e4fb5d4..1a376b42d305 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
| 8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 3f9cafd73931..f1d3d4eb8c4f 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
| 8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 4731fd991efe..28907460e8fa 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
| 8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index cfc0205d62c4..7bd8ac8dfb28 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * This file is part of exofs. | 7 | * This file is part of exofs. |
| 8 | * | 8 | * |
| @@ -29,7 +29,7 @@ | |||
| 29 | 29 | ||
| 30 | #include "ore_raid.h" | 30 | #include "ore_raid.h" |
| 31 | 31 | ||
| 32 | MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); | 32 | MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>"); |
| 33 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); | 33 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); |
| 34 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
| 35 | 35 | ||
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 84529b8a331b..27cbdb697649 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2011 | 2 | * Copyright (C) 2011 |
| 3 | * Boaz Harrosh <bharrosh@panasas.com> | 3 | * Boaz Harrosh <ooo@electrozaur.com> |
| 4 | * | 4 | * |
| 5 | * This file is part of the objects raid engine (ore). | 5 | * This file is part of the objects raid engine (ore). |
| 6 | * | 6 | * |
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h index cf6375d82129..a6e746775570 100644 --- a/fs/exofs/ore_raid.h +++ b/fs/exofs/ore_raid.h | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) from 2011 | 2 | * Copyright (C) from 2011 |
| 3 | * Boaz Harrosh <bharrosh@panasas.com> | 3 | * Boaz Harrosh <ooo@electrozaur.com> |
| 4 | * | 4 | * |
| 5 | * This file is part of the objects raid engine (ore). | 5 | * This file is part of the objects raid engine (ore). |
| 6 | * | 6 | * |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index ed73ed8ebbee..95965503afcb 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
| 8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c index 4dd687c3e747..832e2624b80b 100644 --- a/fs/exofs/symlink.c +++ b/fs/exofs/symlink.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Copyright (C) 2005, 2006 | 2 | * Copyright (C) 2005, 2006 |
| 3 | * Avishay Traeger (avishay@gmail.com) | 3 | * Avishay Traeger (avishay@gmail.com) |
| 4 | * Copyright (C) 2008, 2009 | 4 | * Copyright (C) 2008, 2009 |
| 5 | * Boaz Harrosh <bharrosh@panasas.com> | 5 | * Boaz Harrosh <ooo@electrozaur.com> |
| 6 | * | 6 | * |
| 7 | * Copyrights for code taken from ext2: | 7 | * Copyrights for code taken from ext2: |
| 8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index 1b4f2f95fc37..5e6a2c0a1f0b 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2012 | 2 | * Copyright (C) 2012 |
| 3 | * Sachin Bhamare <sbhamare@panasas.com> | 3 | * Sachin Bhamare <sbhamare@panasas.com> |
| 4 | * Boaz Harrosh <bharrosh@panasas.com> | 4 | * Boaz Harrosh <ooo@electrozaur.com> |
| 5 | * | 5 | * |
| 6 | * This file is part of exofs. | 6 | * This file is part of exofs. |
| 7 | * | 7 | * |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 7015db0bafd1..eb742d0e67ff 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -1354,13 +1354,6 @@ set_qf_format: | |||
| 1354 | "not specified."); | 1354 | "not specified."); |
| 1355 | return 0; | 1355 | return 0; |
| 1356 | } | 1356 | } |
| 1357 | } else { | ||
| 1358 | if (sbi->s_jquota_fmt) { | ||
| 1359 | ext3_msg(sb, KERN_ERR, "error: journaled quota format " | ||
| 1360 | "specified with no journaling " | ||
| 1361 | "enabled."); | ||
| 1362 | return 0; | ||
| 1363 | } | ||
| 1364 | } | 1357 | } |
| 1365 | #endif | 1358 | #endif |
| 1366 | return 1; | 1359 | return 1; |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 581ef40fbe90..83a6f497c4e0 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb, | |||
| 176 | } | 176 | } |
| 177 | 177 | ||
| 178 | /* Initializes an uninitialized block bitmap */ | 178 | /* Initializes an uninitialized block bitmap */ |
| 179 | static void ext4_init_block_bitmap(struct super_block *sb, | 179 | static int ext4_init_block_bitmap(struct super_block *sb, |
| 180 | struct buffer_head *bh, | 180 | struct buffer_head *bh, |
| 181 | ext4_group_t block_group, | 181 | ext4_group_t block_group, |
| 182 | struct ext4_group_desc *gdp) | 182 | struct ext4_group_desc *gdp) |
| @@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb, | |||
| 192 | /* If checksum is bad mark all blocks used to prevent allocation | 192 | /* If checksum is bad mark all blocks used to prevent allocation |
| 193 | * essentially implementing a per-group read-only flag. */ | 193 | * essentially implementing a per-group read-only flag. */ |
| 194 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 194 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
| 195 | ext4_error(sb, "Checksum bad for group %u", block_group); | ||
| 196 | grp = ext4_get_group_info(sb, block_group); | 195 | grp = ext4_get_group_info(sb, block_group); |
| 197 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | 196 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) |
| 198 | percpu_counter_sub(&sbi->s_freeclusters_counter, | 197 | percpu_counter_sub(&sbi->s_freeclusters_counter, |
| @@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb, | |||
| 205 | count); | 204 | count); |
| 206 | } | 205 | } |
| 207 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); | 206 | set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); |
| 208 | return; | 207 | return -EIO; |
| 209 | } | 208 | } |
| 210 | memset(bh->b_data, 0, sb->s_blocksize); | 209 | memset(bh->b_data, 0, sb->s_blocksize); |
| 211 | 210 | ||
| @@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb, | |||
| 243 | sb->s_blocksize * 8, bh->b_data); | 242 | sb->s_blocksize * 8, bh->b_data); |
| 244 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); | 243 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); |
| 245 | ext4_group_desc_csum_set(sb, block_group, gdp); | 244 | ext4_group_desc_csum_set(sb, block_group, gdp); |
| 245 | return 0; | ||
| 246 | } | 246 | } |
| 247 | 247 | ||
| 248 | /* Return the number of free blocks in a block group. It is used when | 248 | /* Return the number of free blocks in a block group. It is used when |
| @@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | |||
| 438 | } | 438 | } |
| 439 | ext4_lock_group(sb, block_group); | 439 | ext4_lock_group(sb, block_group); |
| 440 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 440 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 441 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 441 | int err; |
| 442 | |||
| 443 | err = ext4_init_block_bitmap(sb, bh, block_group, desc); | ||
| 442 | set_bitmap_uptodate(bh); | 444 | set_bitmap_uptodate(bh); |
| 443 | set_buffer_uptodate(bh); | 445 | set_buffer_uptodate(bh); |
| 444 | ext4_unlock_group(sb, block_group); | 446 | ext4_unlock_group(sb, block_group); |
| 445 | unlock_buffer(bh); | 447 | unlock_buffer(bh); |
| 448 | if (err) | ||
| 449 | ext4_error(sb, "Checksum bad for grp %u", block_group); | ||
| 446 | return bh; | 450 | return bh; |
| 447 | } | 451 | } |
| 448 | ext4_unlock_group(sb, block_group); | 452 | ext4_unlock_group(sb, block_group); |
| @@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
| 636 | * Account for the allocated meta blocks. We will never | 640 | * Account for the allocated meta blocks. We will never |
| 637 | * fail EDQUOT for metdata, but we do account for it. | 641 | * fail EDQUOT for metdata, but we do account for it. |
| 638 | */ | 642 | */ |
| 639 | if (!(*errp) && | 643 | if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { |
| 640 | ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { | ||
| 641 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 644 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
| 642 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 645 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 643 | dquot_alloc_block_nofail(inode, | 646 | dquot_alloc_block_nofail(inode, |
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 3285aa5a706a..b610779a958c 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
| @@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | |||
| 24 | __u32 provided, calculated; | 24 | __u32 provided, calculated; |
| 25 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 25 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 26 | 26 | ||
| 27 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 27 | if (!ext4_has_metadata_csum(sb)) |
| 28 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 29 | return 1; | 28 | return 1; |
| 30 | 29 | ||
| 31 | provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); | 30 | provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); |
| @@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | |||
| 46 | __u32 csum; | 45 | __u32 csum; |
| 47 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 46 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 48 | 47 | ||
| 49 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 48 | if (!ext4_has_metadata_csum(sb)) |
| 50 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 51 | return; | 49 | return; |
| 52 | 50 | ||
| 53 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | 51 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); |
| @@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | |||
| 65 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 63 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 66 | int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; | 64 | int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; |
| 67 | 65 | ||
| 68 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 66 | if (!ext4_has_metadata_csum(sb)) |
| 69 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 70 | return 1; | 67 | return 1; |
| 71 | 68 | ||
| 72 | provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); | 69 | provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); |
| @@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | |||
| 91 | __u32 csum; | 88 | __u32 csum; |
| 92 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 89 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 93 | 90 | ||
| 94 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 91 | if (!ext4_has_metadata_csum(sb)) |
| 95 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 96 | return; | 92 | return; |
| 97 | 93 | ||
| 98 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | 94 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 0bb3f9ea0832..c24143ea9c08 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
| @@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
| 151 | &file->f_ra, file, | 151 | &file->f_ra, file, |
| 152 | index, 1); | 152 | index, 1); |
| 153 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 153 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
| 154 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); | 154 | bh = ext4_bread(NULL, inode, map.m_lblk, 0); |
| 155 | if (IS_ERR(bh)) | ||
| 156 | return PTR_ERR(bh); | ||
| 155 | } | 157 | } |
| 156 | 158 | ||
| 157 | /* | ||
| 158 | * We ignore I/O errors on directories so users have a chance | ||
| 159 | * of recovering data when there's a bad sector | ||
| 160 | */ | ||
| 161 | if (!bh) { | 159 | if (!bh) { |
| 162 | if (!dir_has_error) { | 160 | if (!dir_has_error) { |
| 163 | EXT4_ERROR_FILE(file, 0, | 161 | EXT4_ERROR_FILE(file, 0, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b0c225cdb52c..c55a1faaed58 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -572,15 +572,15 @@ enum { | |||
| 572 | 572 | ||
| 573 | /* | 573 | /* |
| 574 | * The bit position of these flags must not overlap with any of the | 574 | * The bit position of these flags must not overlap with any of the |
| 575 | * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(), | 575 | * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), |
| 576 | * read_extent_tree_block(), ext4_split_extent_at(), | 576 | * read_extent_tree_block(), ext4_split_extent_at(), |
| 577 | * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). | 577 | * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). |
| 578 | * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be | 578 | * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be |
| 579 | * caching the extents when reading from the extent tree while a | 579 | * caching the extents when reading from the extent tree while a |
| 580 | * truncate or punch hole operation is in progress. | 580 | * truncate or punch hole operation is in progress. |
| 581 | */ | 581 | */ |
| 582 | #define EXT4_EX_NOCACHE 0x0400 | 582 | #define EXT4_EX_NOCACHE 0x40000000 |
| 583 | #define EXT4_EX_FORCE_CACHE 0x0800 | 583 | #define EXT4_EX_FORCE_CACHE 0x20000000 |
| 584 | 584 | ||
| 585 | /* | 585 | /* |
| 586 | * Flags used by ext4_free_blocks | 586 | * Flags used by ext4_free_blocks |
| @@ -890,6 +890,7 @@ struct ext4_inode_info { | |||
| 890 | struct ext4_es_tree i_es_tree; | 890 | struct ext4_es_tree i_es_tree; |
| 891 | rwlock_t i_es_lock; | 891 | rwlock_t i_es_lock; |
| 892 | struct list_head i_es_lru; | 892 | struct list_head i_es_lru; |
| 893 | unsigned int i_es_all_nr; /* protected by i_es_lock */ | ||
| 893 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 894 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ |
| 894 | unsigned long i_touch_when; /* jiffies of last accessing */ | 895 | unsigned long i_touch_when; /* jiffies of last accessing */ |
| 895 | 896 | ||
| @@ -1174,6 +1175,9 @@ struct ext4_super_block { | |||
| 1174 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 | 1175 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 |
| 1175 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ | 1176 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ |
| 1176 | 1177 | ||
| 1178 | /* Number of quota types we support */ | ||
| 1179 | #define EXT4_MAXQUOTAS 2 | ||
| 1180 | |||
| 1177 | /* | 1181 | /* |
| 1178 | * fourth extended-fs super-block data in memory | 1182 | * fourth extended-fs super-block data in memory |
| 1179 | */ | 1183 | */ |
| @@ -1237,7 +1241,7 @@ struct ext4_sb_info { | |||
| 1237 | u32 s_min_batch_time; | 1241 | u32 s_min_batch_time; |
| 1238 | struct block_device *journal_bdev; | 1242 | struct block_device *journal_bdev; |
| 1239 | #ifdef CONFIG_QUOTA | 1243 | #ifdef CONFIG_QUOTA |
| 1240 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | 1244 | char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ |
| 1241 | int s_jquota_fmt; /* Format of quota to use */ | 1245 | int s_jquota_fmt; /* Format of quota to use */ |
| 1242 | #endif | 1246 | #endif |
| 1243 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | 1247 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ |
| @@ -1330,8 +1334,7 @@ struct ext4_sb_info { | |||
| 1330 | /* Reclaim extents from extent status tree */ | 1334 | /* Reclaim extents from extent status tree */ |
| 1331 | struct shrinker s_es_shrinker; | 1335 | struct shrinker s_es_shrinker; |
| 1332 | struct list_head s_es_lru; | 1336 | struct list_head s_es_lru; |
| 1333 | unsigned long s_es_last_sorted; | 1337 | struct ext4_es_stats s_es_stats; |
| 1334 | struct percpu_counter s_extent_cache_cnt; | ||
| 1335 | struct mb_cache *s_mb_cache; | 1338 | struct mb_cache *s_mb_cache; |
| 1336 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1339 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
| 1337 | 1340 | ||
| @@ -1399,7 +1402,6 @@ enum { | |||
| 1399 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ | 1402 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ |
| 1400 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1403 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
| 1401 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | 1404 | EXT4_STATE_NEWENTRY, /* File just added to dir */ |
| 1402 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ | ||
| 1403 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read | 1405 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read |
| 1404 | nolocking */ | 1406 | nolocking */ |
| 1405 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ | 1407 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ |
| @@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
| 2086 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); | 2088 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
| 2087 | 2089 | ||
| 2088 | /* inode.c */ | 2090 | /* inode.c */ |
| 2089 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 2091 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); |
| 2090 | ext4_lblk_t, int, int *); | 2092 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); |
| 2091 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | ||
| 2092 | ext4_lblk_t, int, int *); | ||
| 2093 | int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2093 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
| 2094 | struct buffer_head *bh_result, int create); | 2094 | struct buffer_head *bh_result, int create); |
| 2095 | int ext4_get_block(struct inode *inode, sector_t iblock, | 2095 | int ext4_get_block(struct inode *inode, sector_t iblock, |
| @@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle, | |||
| 2109 | #define CONVERT_INLINE_DATA 2 | 2109 | #define CONVERT_INLINE_DATA 2 |
| 2110 | 2110 | ||
| 2111 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 2111 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
| 2112 | extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); | ||
| 2112 | extern int ext4_write_inode(struct inode *, struct writeback_control *); | 2113 | extern int ext4_write_inode(struct inode *, struct writeback_control *); |
| 2113 | extern int ext4_setattr(struct dentry *, struct iattr *); | 2114 | extern int ext4_setattr(struct dentry *, struct iattr *); |
| 2114 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 2115 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
| @@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb, | |||
| 2332 | static inline int ext4_has_group_desc_csum(struct super_block *sb) | 2333 | static inline int ext4_has_group_desc_csum(struct super_block *sb) |
| 2333 | { | 2334 | { |
| 2334 | return EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2335 | return EXT4_HAS_RO_COMPAT_FEATURE(sb, |
| 2335 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM | | 2336 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || |
| 2336 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); | 2337 | (EXT4_SB(sb)->s_chksum_driver != NULL); |
| 2337 | } | 2338 | } |
| 2338 | 2339 | ||
| 2340 | static inline int ext4_has_metadata_csum(struct super_block *sb) | ||
| 2341 | { | ||
| 2342 | WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
| 2343 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
| 2344 | !EXT4_SB(sb)->s_chksum_driver); | ||
| 2345 | |||
| 2346 | return (EXT4_SB(sb)->s_chksum_driver != NULL); | ||
| 2347 | } | ||
| 2339 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) | 2348 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) |
| 2340 | { | 2349 | { |
| 2341 | return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | | 2350 | return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | |
| @@ -2731,21 +2740,26 @@ extern int ext4_can_extents_be_merged(struct inode *inode, | |||
| 2731 | struct ext4_extent *ex1, | 2740 | struct ext4_extent *ex1, |
| 2732 | struct ext4_extent *ex2); | 2741 | struct ext4_extent *ex2); |
| 2733 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, | 2742 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, |
| 2734 | struct ext4_ext_path *, | 2743 | struct ext4_ext_path **, |
| 2735 | struct ext4_extent *, int); | 2744 | struct ext4_extent *, int); |
| 2736 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 2745 | extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, |
| 2737 | struct ext4_ext_path *, | 2746 | struct ext4_ext_path **, |
| 2738 | int flags); | 2747 | int flags); |
| 2739 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 2748 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
| 2740 | extern int ext4_ext_check_inode(struct inode *inode); | 2749 | extern int ext4_ext_check_inode(struct inode *inode); |
| 2741 | extern int ext4_find_delalloc_range(struct inode *inode, | 2750 | extern int ext4_find_delalloc_range(struct inode *inode, |
| 2742 | ext4_lblk_t lblk_start, | 2751 | ext4_lblk_t lblk_start, |
| 2743 | ext4_lblk_t lblk_end); | 2752 | ext4_lblk_t lblk_end); |
| 2744 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); | 2753 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); |
| 2754 | extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); | ||
| 2745 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2755 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 2746 | __u64 start, __u64 len); | 2756 | __u64 start, __u64 len); |
| 2747 | extern int ext4_ext_precache(struct inode *inode); | 2757 | extern int ext4_ext_precache(struct inode *inode); |
| 2748 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); | 2758 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); |
| 2759 | extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, | ||
| 2760 | struct inode *inode2, ext4_lblk_t lblk1, | ||
| 2761 | ext4_lblk_t lblk2, ext4_lblk_t count, | ||
| 2762 | int mark_unwritten,int *err); | ||
| 2749 | 2763 | ||
| 2750 | /* move_extent.c */ | 2764 | /* move_extent.c */ |
| 2751 | extern void ext4_double_down_write_data_sem(struct inode *first, | 2765 | extern void ext4_double_down_write_data_sem(struct inode *first, |
| @@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode, | |||
| 2755 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | 2769 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, |
| 2756 | __u64 start_orig, __u64 start_donor, | 2770 | __u64 start_orig, __u64 start_donor, |
| 2757 | __u64 len, __u64 *moved_len); | 2771 | __u64 len, __u64 *moved_len); |
| 2758 | extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
| 2759 | struct ext4_extent **extent); | ||
| 2760 | 2772 | ||
| 2761 | /* page-io.c */ | 2773 | /* page-io.c */ |
| 2762 | extern int __init ext4_init_pageio(void); | 2774 | extern int __init ext4_init_pageio(void); |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index a867f5ca9991..3c9381547094 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
| @@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh) | |||
| 123 | struct ext4_ext_path { | 123 | struct ext4_ext_path { |
| 124 | ext4_fsblk_t p_block; | 124 | ext4_fsblk_t p_block; |
| 125 | __u16 p_depth; | 125 | __u16 p_depth; |
| 126 | __u16 p_maxdepth; | ||
| 126 | struct ext4_extent *p_ext; | 127 | struct ext4_extent *p_ext; |
| 127 | struct ext4_extent_idx *p_idx; | 128 | struct ext4_extent_idx *p_idx; |
| 128 | struct ext4_extent_header *p_hdr; | 129 | struct ext4_extent_header *p_hdr; |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0074e0d23d6e..3445035c7e01 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
| @@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
| 256 | set_buffer_prio(bh); | 256 | set_buffer_prio(bh); |
| 257 | if (ext4_handle_valid(handle)) { | 257 | if (ext4_handle_valid(handle)) { |
| 258 | err = jbd2_journal_dirty_metadata(handle, bh); | 258 | err = jbd2_journal_dirty_metadata(handle, bh); |
| 259 | /* Errors can only happen if there is a bug */ | 259 | /* Errors can only happen due to aborted journal or a nasty bug */ |
| 260 | if (WARN_ON_ONCE(err)) { | 260 | if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) { |
| 261 | ext4_journal_abort_handle(where, line, __func__, bh, | 261 | ext4_journal_abort_handle(where, line, __func__, bh, |
| 262 | handle, err); | 262 | handle, err); |
| 263 | if (inode == NULL) { | 263 | if (inode == NULL) { |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 17c00ff202f2..9c5b49fb281e 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
| @@ -102,9 +102,9 @@ | |||
| 102 | #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 | 102 | #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 |
| 103 | #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 | 103 | #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 |
| 104 | #endif | 104 | #endif |
| 105 | #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 105 | #define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) |
| 106 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) | 106 | #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) |
| 107 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) | 107 | #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) |
| 108 | 108 | ||
| 109 | static inline int ext4_jbd2_credits_xattr(struct inode *inode) | 109 | static inline int ext4_jbd2_credits_xattr(struct inode *inode) |
| 110 | { | 110 | { |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74292a71b384..0b16fb4c06d3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode, | |||
| 73 | { | 73 | { |
| 74 | struct ext4_extent_tail *et; | 74 | struct ext4_extent_tail *et; |
| 75 | 75 | ||
| 76 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 76 | if (!ext4_has_metadata_csum(inode->i_sb)) |
| 77 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 78 | return 1; | 77 | return 1; |
| 79 | 78 | ||
| 80 | et = find_ext4_extent_tail(eh); | 79 | et = find_ext4_extent_tail(eh); |
| @@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode, | |||
| 88 | { | 87 | { |
| 89 | struct ext4_extent_tail *et; | 88 | struct ext4_extent_tail *et; |
| 90 | 89 | ||
| 91 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 90 | if (!ext4_has_metadata_csum(inode->i_sb)) |
| 92 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 93 | return; | 91 | return; |
| 94 | 92 | ||
| 95 | et = find_ext4_extent_tail(eh); | 93 | et = find_ext4_extent_tail(eh); |
| @@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode, | |||
| 98 | 96 | ||
| 99 | static int ext4_split_extent(handle_t *handle, | 97 | static int ext4_split_extent(handle_t *handle, |
| 100 | struct inode *inode, | 98 | struct inode *inode, |
| 101 | struct ext4_ext_path *path, | 99 | struct ext4_ext_path **ppath, |
| 102 | struct ext4_map_blocks *map, | 100 | struct ext4_map_blocks *map, |
| 103 | int split_flag, | 101 | int split_flag, |
| 104 | int flags); | 102 | int flags); |
| 105 | 103 | ||
| 106 | static int ext4_split_extent_at(handle_t *handle, | 104 | static int ext4_split_extent_at(handle_t *handle, |
| 107 | struct inode *inode, | 105 | struct inode *inode, |
| 108 | struct ext4_ext_path *path, | 106 | struct ext4_ext_path **ppath, |
| 109 | ext4_lblk_t split, | 107 | ext4_lblk_t split, |
| 110 | int split_flag, | 108 | int split_flag, |
| 111 | int flags); | 109 | int flags); |
| @@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) | |||
| 291 | return size; | 289 | return size; |
| 292 | } | 290 | } |
| 293 | 291 | ||
| 292 | static inline int | ||
| 293 | ext4_force_split_extent_at(handle_t *handle, struct inode *inode, | ||
| 294 | struct ext4_ext_path **ppath, ext4_lblk_t lblk, | ||
| 295 | int nofail) | ||
| 296 | { | ||
| 297 | struct ext4_ext_path *path = *ppath; | ||
| 298 | int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); | ||
| 299 | |||
| 300 | return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? | ||
| 301 | EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, | ||
| 302 | EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | | ||
| 303 | (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); | ||
| 304 | } | ||
| 305 | |||
| 294 | /* | 306 | /* |
| 295 | * Calculate the number of metadata blocks needed | 307 | * Calculate the number of metadata blocks needed |
| 296 | * to allocate @blocks | 308 | * to allocate @blocks |
| @@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, | |||
| 695 | 707 | ||
| 696 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 708 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
| 697 | { | 709 | { |
| 698 | int depth = path->p_depth; | 710 | int depth, i; |
| 699 | int i; | ||
| 700 | 711 | ||
| 712 | if (!path) | ||
| 713 | return; | ||
| 714 | depth = path->p_depth; | ||
| 701 | for (i = 0; i <= depth; i++, path++) | 715 | for (i = 0; i <= depth; i++, path++) |
| 702 | if (path->p_bh) { | 716 | if (path->p_bh) { |
| 703 | brelse(path->p_bh); | 717 | brelse(path->p_bh); |
| @@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
| 841 | } | 855 | } |
| 842 | 856 | ||
| 843 | struct ext4_ext_path * | 857 | struct ext4_ext_path * |
| 844 | ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | 858 | ext4_find_extent(struct inode *inode, ext4_lblk_t block, |
| 845 | struct ext4_ext_path *path, int flags) | 859 | struct ext4_ext_path **orig_path, int flags) |
| 846 | { | 860 | { |
| 847 | struct ext4_extent_header *eh; | 861 | struct ext4_extent_header *eh; |
| 848 | struct buffer_head *bh; | 862 | struct buffer_head *bh; |
| 849 | short int depth, i, ppos = 0, alloc = 0; | 863 | struct ext4_ext_path *path = orig_path ? *orig_path : NULL; |
| 864 | short int depth, i, ppos = 0; | ||
| 850 | int ret; | 865 | int ret; |
| 851 | 866 | ||
| 852 | eh = ext_inode_hdr(inode); | 867 | eh = ext_inode_hdr(inode); |
| 853 | depth = ext_depth(inode); | 868 | depth = ext_depth(inode); |
| 854 | 869 | ||
| 855 | /* account possible depth increase */ | 870 | if (path) { |
| 871 | ext4_ext_drop_refs(path); | ||
| 872 | if (depth > path[0].p_maxdepth) { | ||
| 873 | kfree(path); | ||
| 874 | *orig_path = path = NULL; | ||
| 875 | } | ||
| 876 | } | ||
| 856 | if (!path) { | 877 | if (!path) { |
| 878 | /* account possible depth increase */ | ||
| 857 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), | 879 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), |
| 858 | GFP_NOFS); | 880 | GFP_NOFS); |
| 859 | if (!path) | 881 | if (unlikely(!path)) |
| 860 | return ERR_PTR(-ENOMEM); | 882 | return ERR_PTR(-ENOMEM); |
| 861 | alloc = 1; | 883 | path[0].p_maxdepth = depth + 1; |
| 862 | } | 884 | } |
| 863 | path[0].p_hdr = eh; | 885 | path[0].p_hdr = eh; |
| 864 | path[0].p_bh = NULL; | 886 | path[0].p_bh = NULL; |
| @@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
| 876 | 898 | ||
| 877 | bh = read_extent_tree_block(inode, path[ppos].p_block, --i, | 899 | bh = read_extent_tree_block(inode, path[ppos].p_block, --i, |
| 878 | flags); | 900 | flags); |
| 879 | if (IS_ERR(bh)) { | 901 | if (unlikely(IS_ERR(bh))) { |
| 880 | ret = PTR_ERR(bh); | 902 | ret = PTR_ERR(bh); |
| 881 | goto err; | 903 | goto err; |
| 882 | } | 904 | } |
| @@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
| 910 | 932 | ||
| 911 | err: | 933 | err: |
| 912 | ext4_ext_drop_refs(path); | 934 | ext4_ext_drop_refs(path); |
| 913 | if (alloc) | 935 | kfree(path); |
| 914 | kfree(path); | 936 | if (orig_path) |
| 937 | *orig_path = NULL; | ||
| 915 | return ERR_PTR(ret); | 938 | return ERR_PTR(ret); |
| 916 | } | 939 | } |
| 917 | 940 | ||
| @@ -1238,16 +1261,24 @@ cleanup: | |||
| 1238 | * just created block | 1261 | * just created block |
| 1239 | */ | 1262 | */ |
| 1240 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | 1263 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, |
| 1241 | unsigned int flags, | 1264 | unsigned int flags) |
| 1242 | struct ext4_extent *newext) | ||
| 1243 | { | 1265 | { |
| 1244 | struct ext4_extent_header *neh; | 1266 | struct ext4_extent_header *neh; |
| 1245 | struct buffer_head *bh; | 1267 | struct buffer_head *bh; |
| 1246 | ext4_fsblk_t newblock; | 1268 | ext4_fsblk_t newblock, goal = 0; |
| 1269 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
| 1247 | int err = 0; | 1270 | int err = 0; |
| 1248 | 1271 | ||
| 1249 | newblock = ext4_ext_new_meta_block(handle, inode, NULL, | 1272 | /* Try to prepend new index to old one */ |
| 1250 | newext, &err, flags); | 1273 | if (ext_depth(inode)) |
| 1274 | goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); | ||
| 1275 | if (goal > le32_to_cpu(es->s_first_data_block)) { | ||
| 1276 | flags |= EXT4_MB_HINT_TRY_GOAL; | ||
| 1277 | goal--; | ||
| 1278 | } else | ||
| 1279 | goal = ext4_inode_to_goal_block(inode); | ||
| 1280 | newblock = ext4_new_meta_blocks(handle, inode, goal, flags, | ||
| 1281 | NULL, &err); | ||
| 1251 | if (newblock == 0) | 1282 | if (newblock == 0) |
| 1252 | return err; | 1283 | return err; |
| 1253 | 1284 | ||
| @@ -1314,9 +1345,10 @@ out: | |||
| 1314 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, | 1345 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, |
| 1315 | unsigned int mb_flags, | 1346 | unsigned int mb_flags, |
| 1316 | unsigned int gb_flags, | 1347 | unsigned int gb_flags, |
| 1317 | struct ext4_ext_path *path, | 1348 | struct ext4_ext_path **ppath, |
| 1318 | struct ext4_extent *newext) | 1349 | struct ext4_extent *newext) |
| 1319 | { | 1350 | { |
| 1351 | struct ext4_ext_path *path = *ppath; | ||
| 1320 | struct ext4_ext_path *curp; | 1352 | struct ext4_ext_path *curp; |
| 1321 | int depth, i, err = 0; | 1353 | int depth, i, err = 0; |
| 1322 | 1354 | ||
| @@ -1340,23 +1372,21 @@ repeat: | |||
| 1340 | goto out; | 1372 | goto out; |
| 1341 | 1373 | ||
| 1342 | /* refill path */ | 1374 | /* refill path */ |
| 1343 | ext4_ext_drop_refs(path); | 1375 | path = ext4_find_extent(inode, |
| 1344 | path = ext4_ext_find_extent(inode, | ||
| 1345 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), | 1376 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
| 1346 | path, gb_flags); | 1377 | ppath, gb_flags); |
| 1347 | if (IS_ERR(path)) | 1378 | if (IS_ERR(path)) |
| 1348 | err = PTR_ERR(path); | 1379 | err = PTR_ERR(path); |
| 1349 | } else { | 1380 | } else { |
| 1350 | /* tree is full, time to grow in depth */ | 1381 | /* tree is full, time to grow in depth */ |
| 1351 | err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext); | 1382 | err = ext4_ext_grow_indepth(handle, inode, mb_flags); |
| 1352 | if (err) | 1383 | if (err) |
| 1353 | goto out; | 1384 | goto out; |
| 1354 | 1385 | ||
| 1355 | /* refill path */ | 1386 | /* refill path */ |
| 1356 | ext4_ext_drop_refs(path); | 1387 | path = ext4_find_extent(inode, |
| 1357 | path = ext4_ext_find_extent(inode, | ||
| 1358 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), | 1388 | (ext4_lblk_t)le32_to_cpu(newext->ee_block), |
| 1359 | path, gb_flags); | 1389 | ppath, gb_flags); |
| 1360 | if (IS_ERR(path)) { | 1390 | if (IS_ERR(path)) { |
| 1361 | err = PTR_ERR(path); | 1391 | err = PTR_ERR(path); |
| 1362 | goto out; | 1392 | goto out; |
| @@ -1559,7 +1589,7 @@ found_extent: | |||
| 1559 | * allocated block. Thus, index entries have to be consistent | 1589 | * allocated block. Thus, index entries have to be consistent |
| 1560 | * with leaves. | 1590 | * with leaves. |
| 1561 | */ | 1591 | */ |
| 1562 | static ext4_lblk_t | 1592 | ext4_lblk_t |
| 1563 | ext4_ext_next_allocated_block(struct ext4_ext_path *path) | 1593 | ext4_ext_next_allocated_block(struct ext4_ext_path *path) |
| 1564 | { | 1594 | { |
| 1565 | int depth; | 1595 | int depth; |
| @@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, | |||
| 1802 | sizeof(struct ext4_extent_idx); | 1832 | sizeof(struct ext4_extent_idx); |
| 1803 | s += sizeof(struct ext4_extent_header); | 1833 | s += sizeof(struct ext4_extent_header); |
| 1804 | 1834 | ||
| 1835 | path[1].p_maxdepth = path[0].p_maxdepth; | ||
| 1805 | memcpy(path[0].p_hdr, path[1].p_hdr, s); | 1836 | memcpy(path[0].p_hdr, path[1].p_hdr, s); |
| 1806 | path[0].p_depth = 0; | 1837 | path[0].p_depth = 0; |
| 1807 | path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + | 1838 | path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + |
| @@ -1896,9 +1927,10 @@ out: | |||
| 1896 | * creating new leaf in the no-space case. | 1927 | * creating new leaf in the no-space case. |
| 1897 | */ | 1928 | */ |
| 1898 | int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | 1929 | int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, |
| 1899 | struct ext4_ext_path *path, | 1930 | struct ext4_ext_path **ppath, |
| 1900 | struct ext4_extent *newext, int gb_flags) | 1931 | struct ext4_extent *newext, int gb_flags) |
| 1901 | { | 1932 | { |
| 1933 | struct ext4_ext_path *path = *ppath; | ||
| 1902 | struct ext4_extent_header *eh; | 1934 | struct ext4_extent_header *eh; |
| 1903 | struct ext4_extent *ex, *fex; | 1935 | struct ext4_extent *ex, *fex; |
| 1904 | struct ext4_extent *nearex; /* nearest extent */ | 1936 | struct ext4_extent *nearex; /* nearest extent */ |
| @@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1907 | ext4_lblk_t next; | 1939 | ext4_lblk_t next; |
| 1908 | int mb_flags = 0, unwritten; | 1940 | int mb_flags = 0, unwritten; |
| 1909 | 1941 | ||
| 1942 | if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
| 1943 | mb_flags |= EXT4_MB_DELALLOC_RESERVED; | ||
| 1910 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1944 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
| 1911 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1945 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
| 1912 | return -EIO; | 1946 | return -EIO; |
| @@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1925 | /* | 1959 | /* |
| 1926 | * Try to see whether we should rather test the extent on | 1960 | * Try to see whether we should rather test the extent on |
| 1927 | * right from ex, or from the left of ex. This is because | 1961 | * right from ex, or from the left of ex. This is because |
| 1928 | * ext4_ext_find_extent() can return either extent on the | 1962 | * ext4_find_extent() can return either extent on the |
| 1929 | * left, or on the right from the searched position. This | 1963 | * left, or on the right from the searched position. This |
| 1930 | * will make merging more effective. | 1964 | * will make merging more effective. |
| 1931 | */ | 1965 | */ |
| @@ -2008,7 +2042,7 @@ prepend: | |||
| 2008 | if (next != EXT_MAX_BLOCKS) { | 2042 | if (next != EXT_MAX_BLOCKS) { |
| 2009 | ext_debug("next leaf block - %u\n", next); | 2043 | ext_debug("next leaf block - %u\n", next); |
| 2010 | BUG_ON(npath != NULL); | 2044 | BUG_ON(npath != NULL); |
| 2011 | npath = ext4_ext_find_extent(inode, next, NULL, 0); | 2045 | npath = ext4_find_extent(inode, next, NULL, 0); |
| 2012 | if (IS_ERR(npath)) | 2046 | if (IS_ERR(npath)) |
| 2013 | return PTR_ERR(npath); | 2047 | return PTR_ERR(npath); |
| 2014 | BUG_ON(npath->p_depth != path->p_depth); | 2048 | BUG_ON(npath->p_depth != path->p_depth); |
| @@ -2028,9 +2062,9 @@ prepend: | |||
| 2028 | * We're gonna add a new leaf in the tree. | 2062 | * We're gonna add a new leaf in the tree. |
| 2029 | */ | 2063 | */ |
| 2030 | if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) | 2064 | if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) |
| 2031 | mb_flags = EXT4_MB_USE_RESERVED; | 2065 | mb_flags |= EXT4_MB_USE_RESERVED; |
| 2032 | err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, | 2066 | err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, |
| 2033 | path, newext); | 2067 | ppath, newext); |
| 2034 | if (err) | 2068 | if (err) |
| 2035 | goto cleanup; | 2069 | goto cleanup; |
| 2036 | depth = ext_depth(inode); | 2070 | depth = ext_depth(inode); |
| @@ -2108,10 +2142,8 @@ merge: | |||
| 2108 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | 2142 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
| 2109 | 2143 | ||
| 2110 | cleanup: | 2144 | cleanup: |
| 2111 | if (npath) { | 2145 | ext4_ext_drop_refs(npath); |
| 2112 | ext4_ext_drop_refs(npath); | 2146 | kfree(npath); |
| 2113 | kfree(npath); | ||
| 2114 | } | ||
| 2115 | return err; | 2147 | return err; |
| 2116 | } | 2148 | } |
| 2117 | 2149 | ||
| @@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
| 2133 | /* find extent for this block */ | 2165 | /* find extent for this block */ |
| 2134 | down_read(&EXT4_I(inode)->i_data_sem); | 2166 | down_read(&EXT4_I(inode)->i_data_sem); |
| 2135 | 2167 | ||
| 2136 | if (path && ext_depth(inode) != depth) { | 2168 | path = ext4_find_extent(inode, block, &path, 0); |
| 2137 | /* depth was changed. we have to realloc path */ | ||
| 2138 | kfree(path); | ||
| 2139 | path = NULL; | ||
| 2140 | } | ||
| 2141 | |||
| 2142 | path = ext4_ext_find_extent(inode, block, path, 0); | ||
| 2143 | if (IS_ERR(path)) { | 2169 | if (IS_ERR(path)) { |
| 2144 | up_read(&EXT4_I(inode)->i_data_sem); | 2170 | up_read(&EXT4_I(inode)->i_data_sem); |
| 2145 | err = PTR_ERR(path); | 2171 | err = PTR_ERR(path); |
| @@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
| 2156 | } | 2182 | } |
| 2157 | ex = path[depth].p_ext; | 2183 | ex = path[depth].p_ext; |
| 2158 | next = ext4_ext_next_allocated_block(path); | 2184 | next = ext4_ext_next_allocated_block(path); |
| 2159 | ext4_ext_drop_refs(path); | ||
| 2160 | 2185 | ||
| 2161 | flags = 0; | 2186 | flags = 0; |
| 2162 | exists = 0; | 2187 | exists = 0; |
| @@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
| 2266 | block = es.es_lblk + es.es_len; | 2291 | block = es.es_lblk + es.es_len; |
| 2267 | } | 2292 | } |
| 2268 | 2293 | ||
| 2269 | if (path) { | 2294 | ext4_ext_drop_refs(path); |
| 2270 | ext4_ext_drop_refs(path); | 2295 | kfree(path); |
| 2271 | kfree(path); | ||
| 2272 | } | ||
| 2273 | |||
| 2274 | return err; | 2296 | return err; |
| 2275 | } | 2297 | } |
| 2276 | 2298 | ||
| @@ -2826,7 +2848,7 @@ again: | |||
| 2826 | ext4_lblk_t ee_block; | 2848 | ext4_lblk_t ee_block; |
| 2827 | 2849 | ||
| 2828 | /* find extent for this block */ | 2850 | /* find extent for this block */ |
| 2829 | path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); | 2851 | path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); |
| 2830 | if (IS_ERR(path)) { | 2852 | if (IS_ERR(path)) { |
| 2831 | ext4_journal_stop(handle); | 2853 | ext4_journal_stop(handle); |
| 2832 | return PTR_ERR(path); | 2854 | return PTR_ERR(path); |
| @@ -2854,24 +2876,14 @@ again: | |||
| 2854 | */ | 2876 | */ |
| 2855 | if (end >= ee_block && | 2877 | if (end >= ee_block && |
| 2856 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { | 2878 | end < ee_block + ext4_ext_get_actual_len(ex) - 1) { |
| 2857 | int split_flag = 0; | ||
| 2858 | |||
| 2859 | if (ext4_ext_is_unwritten(ex)) | ||
| 2860 | split_flag = EXT4_EXT_MARK_UNWRIT1 | | ||
| 2861 | EXT4_EXT_MARK_UNWRIT2; | ||
| 2862 | |||
| 2863 | /* | 2879 | /* |
| 2864 | * Split the extent in two so that 'end' is the last | 2880 | * Split the extent in two so that 'end' is the last |
| 2865 | * block in the first new extent. Also we should not | 2881 | * block in the first new extent. Also we should not |
| 2866 | * fail removing space due to ENOSPC so try to use | 2882 | * fail removing space due to ENOSPC so try to use |
| 2867 | * reserved block if that happens. | 2883 | * reserved block if that happens. |
| 2868 | */ | 2884 | */ |
| 2869 | err = ext4_split_extent_at(handle, inode, path, | 2885 | err = ext4_force_split_extent_at(handle, inode, &path, |
| 2870 | end + 1, split_flag, | 2886 | end + 1, 1); |
| 2871 | EXT4_EX_NOCACHE | | ||
| 2872 | EXT4_GET_BLOCKS_PRE_IO | | ||
| 2873 | EXT4_GET_BLOCKS_METADATA_NOFAIL); | ||
| 2874 | |||
| 2875 | if (err < 0) | 2887 | if (err < 0) |
| 2876 | goto out; | 2888 | goto out; |
| 2877 | } | 2889 | } |
| @@ -2893,7 +2905,7 @@ again: | |||
| 2893 | ext4_journal_stop(handle); | 2905 | ext4_journal_stop(handle); |
| 2894 | return -ENOMEM; | 2906 | return -ENOMEM; |
| 2895 | } | 2907 | } |
| 2896 | path[0].p_depth = depth; | 2908 | path[0].p_maxdepth = path[0].p_depth = depth; |
| 2897 | path[0].p_hdr = ext_inode_hdr(inode); | 2909 | path[0].p_hdr = ext_inode_hdr(inode); |
| 2898 | i = 0; | 2910 | i = 0; |
| 2899 | 2911 | ||
| @@ -3013,10 +3025,9 @@ again: | |||
| 3013 | out: | 3025 | out: |
| 3014 | ext4_ext_drop_refs(path); | 3026 | ext4_ext_drop_refs(path); |
| 3015 | kfree(path); | 3027 | kfree(path); |
| 3016 | if (err == -EAGAIN) { | 3028 | path = NULL; |
| 3017 | path = NULL; | 3029 | if (err == -EAGAIN) |
| 3018 | goto again; | 3030 | goto again; |
| 3019 | } | ||
| 3020 | ext4_journal_stop(handle); | 3031 | ext4_journal_stop(handle); |
| 3021 | 3032 | ||
| 3022 | return err; | 3033 | return err; |
| @@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
| 3130 | */ | 3141 | */ |
| 3131 | static int ext4_split_extent_at(handle_t *handle, | 3142 | static int ext4_split_extent_at(handle_t *handle, |
| 3132 | struct inode *inode, | 3143 | struct inode *inode, |
| 3133 | struct ext4_ext_path *path, | 3144 | struct ext4_ext_path **ppath, |
| 3134 | ext4_lblk_t split, | 3145 | ext4_lblk_t split, |
| 3135 | int split_flag, | 3146 | int split_flag, |
| 3136 | int flags) | 3147 | int flags) |
| 3137 | { | 3148 | { |
| 3149 | struct ext4_ext_path *path = *ppath; | ||
| 3138 | ext4_fsblk_t newblock; | 3150 | ext4_fsblk_t newblock; |
| 3139 | ext4_lblk_t ee_block; | 3151 | ext4_lblk_t ee_block; |
| 3140 | struct ext4_extent *ex, newex, orig_ex, zero_ex; | 3152 | struct ext4_extent *ex, newex, orig_ex, zero_ex; |
| @@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle, | |||
| 3205 | if (split_flag & EXT4_EXT_MARK_UNWRIT2) | 3217 | if (split_flag & EXT4_EXT_MARK_UNWRIT2) |
| 3206 | ext4_ext_mark_unwritten(ex2); | 3218 | ext4_ext_mark_unwritten(ex2); |
| 3207 | 3219 | ||
| 3208 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3220 | err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); |
| 3209 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 3221 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { |
| 3210 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { | 3222 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { |
| 3211 | if (split_flag & EXT4_EXT_DATA_VALID1) { | 3223 | if (split_flag & EXT4_EXT_DATA_VALID1) { |
| @@ -3271,11 +3283,12 @@ fix_extent_len: | |||
| 3271 | */ | 3283 | */ |
| 3272 | static int ext4_split_extent(handle_t *handle, | 3284 | static int ext4_split_extent(handle_t *handle, |
| 3273 | struct inode *inode, | 3285 | struct inode *inode, |
| 3274 | struct ext4_ext_path *path, | 3286 | struct ext4_ext_path **ppath, |
| 3275 | struct ext4_map_blocks *map, | 3287 | struct ext4_map_blocks *map, |
| 3276 | int split_flag, | 3288 | int split_flag, |
| 3277 | int flags) | 3289 | int flags) |
| 3278 | { | 3290 | { |
| 3291 | struct ext4_ext_path *path = *ppath; | ||
| 3279 | ext4_lblk_t ee_block; | 3292 | ext4_lblk_t ee_block; |
| 3280 | struct ext4_extent *ex; | 3293 | struct ext4_extent *ex; |
| 3281 | unsigned int ee_len, depth; | 3294 | unsigned int ee_len, depth; |
| @@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle, | |||
| 3298 | EXT4_EXT_MARK_UNWRIT2; | 3311 | EXT4_EXT_MARK_UNWRIT2; |
| 3299 | if (split_flag & EXT4_EXT_DATA_VALID2) | 3312 | if (split_flag & EXT4_EXT_DATA_VALID2) |
| 3300 | split_flag1 |= EXT4_EXT_DATA_VALID1; | 3313 | split_flag1 |= EXT4_EXT_DATA_VALID1; |
| 3301 | err = ext4_split_extent_at(handle, inode, path, | 3314 | err = ext4_split_extent_at(handle, inode, ppath, |
| 3302 | map->m_lblk + map->m_len, split_flag1, flags1); | 3315 | map->m_lblk + map->m_len, split_flag1, flags1); |
| 3303 | if (err) | 3316 | if (err) |
| 3304 | goto out; | 3317 | goto out; |
| @@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle, | |||
| 3309 | * Update path is required because previous ext4_split_extent_at() may | 3322 | * Update path is required because previous ext4_split_extent_at() may |
| 3310 | * result in split of original leaf or extent zeroout. | 3323 | * result in split of original leaf or extent zeroout. |
| 3311 | */ | 3324 | */ |
| 3312 | ext4_ext_drop_refs(path); | 3325 | path = ext4_find_extent(inode, map->m_lblk, ppath, 0); |
| 3313 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | ||
| 3314 | if (IS_ERR(path)) | 3326 | if (IS_ERR(path)) |
| 3315 | return PTR_ERR(path); | 3327 | return PTR_ERR(path); |
| 3316 | depth = ext_depth(inode); | 3328 | depth = ext_depth(inode); |
| @@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle, | |||
| 3330 | split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | | 3342 | split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | |
| 3331 | EXT4_EXT_MARK_UNWRIT2); | 3343 | EXT4_EXT_MARK_UNWRIT2); |
| 3332 | } | 3344 | } |
| 3333 | err = ext4_split_extent_at(handle, inode, path, | 3345 | err = ext4_split_extent_at(handle, inode, ppath, |
| 3334 | map->m_lblk, split_flag1, flags); | 3346 | map->m_lblk, split_flag1, flags); |
| 3335 | if (err) | 3347 | if (err) |
| 3336 | goto out; | 3348 | goto out; |
| @@ -3364,9 +3376,10 @@ out: | |||
| 3364 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 3376 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
| 3365 | struct inode *inode, | 3377 | struct inode *inode, |
| 3366 | struct ext4_map_blocks *map, | 3378 | struct ext4_map_blocks *map, |
| 3367 | struct ext4_ext_path *path, | 3379 | struct ext4_ext_path **ppath, |
| 3368 | int flags) | 3380 | int flags) |
| 3369 | { | 3381 | { |
| 3382 | struct ext4_ext_path *path = *ppath; | ||
| 3370 | struct ext4_sb_info *sbi; | 3383 | struct ext4_sb_info *sbi; |
| 3371 | struct ext4_extent_header *eh; | 3384 | struct ext4_extent_header *eh; |
| 3372 | struct ext4_map_blocks split_map; | 3385 | struct ext4_map_blocks split_map; |
| @@ -3590,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
| 3590 | } | 3603 | } |
| 3591 | } | 3604 | } |
| 3592 | 3605 | ||
| 3593 | allocated = ext4_split_extent(handle, inode, path, | 3606 | err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, |
| 3594 | &split_map, split_flag, flags); | 3607 | flags); |
| 3595 | if (allocated < 0) | 3608 | if (err > 0) |
| 3596 | err = allocated; | 3609 | err = 0; |
| 3597 | |||
| 3598 | out: | 3610 | out: |
| 3599 | /* If we have gotten a failure, don't zero out status tree */ | 3611 | /* If we have gotten a failure, don't zero out status tree */ |
| 3600 | if (!err) | 3612 | if (!err) |
| @@ -3629,9 +3641,10 @@ out: | |||
| 3629 | static int ext4_split_convert_extents(handle_t *handle, | 3641 | static int ext4_split_convert_extents(handle_t *handle, |
| 3630 | struct inode *inode, | 3642 | struct inode *inode, |
| 3631 | struct ext4_map_blocks *map, | 3643 | struct ext4_map_blocks *map, |
| 3632 | struct ext4_ext_path *path, | 3644 | struct ext4_ext_path **ppath, |
| 3633 | int flags) | 3645 | int flags) |
| 3634 | { | 3646 | { |
| 3647 | struct ext4_ext_path *path = *ppath; | ||
| 3635 | ext4_lblk_t eof_block; | 3648 | ext4_lblk_t eof_block; |
| 3636 | ext4_lblk_t ee_block; | 3649 | ext4_lblk_t ee_block; |
| 3637 | struct ext4_extent *ex; | 3650 | struct ext4_extent *ex; |
| @@ -3665,74 +3678,15 @@ static int ext4_split_convert_extents(handle_t *handle, | |||
| 3665 | split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); | 3678 | split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); |
| 3666 | } | 3679 | } |
| 3667 | flags |= EXT4_GET_BLOCKS_PRE_IO; | 3680 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
| 3668 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); | 3681 | return ext4_split_extent(handle, inode, ppath, map, split_flag, flags); |
| 3669 | } | 3682 | } |
| 3670 | 3683 | ||
| 3671 | static int ext4_convert_initialized_extents(handle_t *handle, | ||
| 3672 | struct inode *inode, | ||
| 3673 | struct ext4_map_blocks *map, | ||
| 3674 | struct ext4_ext_path *path) | ||
| 3675 | { | ||
| 3676 | struct ext4_extent *ex; | ||
| 3677 | ext4_lblk_t ee_block; | ||
| 3678 | unsigned int ee_len; | ||
| 3679 | int depth; | ||
| 3680 | int err = 0; | ||
| 3681 | |||
| 3682 | depth = ext_depth(inode); | ||
| 3683 | ex = path[depth].p_ext; | ||
| 3684 | ee_block = le32_to_cpu(ex->ee_block); | ||
| 3685 | ee_len = ext4_ext_get_actual_len(ex); | ||
| 3686 | |||
| 3687 | ext_debug("%s: inode %lu, logical" | ||
| 3688 | "block %llu, max_blocks %u\n", __func__, inode->i_ino, | ||
| 3689 | (unsigned long long)ee_block, ee_len); | ||
| 3690 | |||
| 3691 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
| 3692 | err = ext4_split_convert_extents(handle, inode, map, path, | ||
| 3693 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); | ||
| 3694 | if (err < 0) | ||
| 3695 | goto out; | ||
| 3696 | ext4_ext_drop_refs(path); | ||
| 3697 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | ||
| 3698 | if (IS_ERR(path)) { | ||
| 3699 | err = PTR_ERR(path); | ||
| 3700 | goto out; | ||
| 3701 | } | ||
| 3702 | depth = ext_depth(inode); | ||
| 3703 | ex = path[depth].p_ext; | ||
| 3704 | if (!ex) { | ||
| 3705 | EXT4_ERROR_INODE(inode, "unexpected hole at %lu", | ||
| 3706 | (unsigned long) map->m_lblk); | ||
| 3707 | err = -EIO; | ||
| 3708 | goto out; | ||
| 3709 | } | ||
| 3710 | } | ||
| 3711 | |||
| 3712 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
| 3713 | if (err) | ||
| 3714 | goto out; | ||
| 3715 | /* first mark the extent as unwritten */ | ||
| 3716 | ext4_ext_mark_unwritten(ex); | ||
| 3717 | |||
| 3718 | /* note: ext4_ext_correct_indexes() isn't needed here because | ||
| 3719 | * borders are not changed | ||
| 3720 | */ | ||
| 3721 | ext4_ext_try_to_merge(handle, inode, path, ex); | ||
| 3722 | |||
| 3723 | /* Mark modified extent as dirty */ | ||
| 3724 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | ||
| 3725 | out: | ||
| 3726 | ext4_ext_show_leaf(inode, path); | ||
| 3727 | return err; | ||
| 3728 | } | ||
| 3729 | |||
| 3730 | |||
| 3731 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3684 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
| 3732 | struct inode *inode, | 3685 | struct inode *inode, |
| 3733 | struct ext4_map_blocks *map, | 3686 | struct ext4_map_blocks *map, |
| 3734 | struct ext4_ext_path *path) | 3687 | struct ext4_ext_path **ppath) |
| 3735 | { | 3688 | { |
| 3689 | struct ext4_ext_path *path = *ppath; | ||
| 3736 | struct ext4_extent *ex; | 3690 | struct ext4_extent *ex; |
| 3737 | ext4_lblk_t ee_block; | 3691 | ext4_lblk_t ee_block; |
| 3738 | unsigned int ee_len; | 3692 | unsigned int ee_len; |
| @@ -3761,16 +3715,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
| 3761 | inode->i_ino, (unsigned long long)ee_block, ee_len, | 3715 | inode->i_ino, (unsigned long long)ee_block, ee_len, |
| 3762 | (unsigned long long)map->m_lblk, map->m_len); | 3716 | (unsigned long long)map->m_lblk, map->m_len); |
| 3763 | #endif | 3717 | #endif |
| 3764 | err = ext4_split_convert_extents(handle, inode, map, path, | 3718 | err = ext4_split_convert_extents(handle, inode, map, ppath, |
| 3765 | EXT4_GET_BLOCKS_CONVERT); | 3719 | EXT4_GET_BLOCKS_CONVERT); |
| 3766 | if (err < 0) | 3720 | if (err < 0) |
| 3767 | goto out; | 3721 | return err; |
| 3768 | ext4_ext_drop_refs(path); | 3722 | path = ext4_find_extent(inode, map->m_lblk, ppath, 0); |
| 3769 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | 3723 | if (IS_ERR(path)) |
| 3770 | if (IS_ERR(path)) { | 3724 | return PTR_ERR(path); |
| 3771 | err = PTR_ERR(path); | ||
| 3772 | goto out; | ||
| 3773 | } | ||
| 3774 | depth = ext_depth(inode); | 3725 | depth = ext_depth(inode); |
| 3775 | ex = path[depth].p_ext; | 3726 | ex = path[depth].p_ext; |
| 3776 | } | 3727 | } |
| @@ -3963,12 +3914,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
| 3963 | } | 3914 | } |
| 3964 | 3915 | ||
| 3965 | static int | 3916 | static int |
| 3966 | ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, | 3917 | convert_initialized_extent(handle_t *handle, struct inode *inode, |
| 3967 | struct ext4_map_blocks *map, | 3918 | struct ext4_map_blocks *map, |
| 3968 | struct ext4_ext_path *path, int flags, | 3919 | struct ext4_ext_path **ppath, int flags, |
| 3969 | unsigned int allocated, ext4_fsblk_t newblock) | 3920 | unsigned int allocated, ext4_fsblk_t newblock) |
| 3970 | { | 3921 | { |
| 3971 | int ret = 0; | 3922 | struct ext4_ext_path *path = *ppath; |
| 3923 | struct ext4_extent *ex; | ||
| 3924 | ext4_lblk_t ee_block; | ||
| 3925 | unsigned int ee_len; | ||
| 3926 | int depth; | ||
| 3972 | int err = 0; | 3927 | int err = 0; |
| 3973 | 3928 | ||
| 3974 | /* | 3929 | /* |
| @@ -3978,28 +3933,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, | |||
| 3978 | if (map->m_len > EXT_UNWRITTEN_MAX_LEN) | 3933 | if (map->m_len > EXT_UNWRITTEN_MAX_LEN) |
| 3979 | map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; | 3934 | map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; |
| 3980 | 3935 | ||
| 3981 | ret = ext4_convert_initialized_extents(handle, inode, map, | 3936 | depth = ext_depth(inode); |
| 3982 | path); | 3937 | ex = path[depth].p_ext; |
| 3983 | if (ret >= 0) { | 3938 | ee_block = le32_to_cpu(ex->ee_block); |
| 3984 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3939 | ee_len = ext4_ext_get_actual_len(ex); |
| 3985 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | 3940 | |
| 3986 | path, map->m_len); | 3941 | ext_debug("%s: inode %lu, logical" |
| 3987 | } else | 3942 | "block %llu, max_blocks %u\n", __func__, inode->i_ino, |
| 3988 | err = ret; | 3943 | (unsigned long long)ee_block, ee_len); |
| 3944 | |||
| 3945 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
| 3946 | err = ext4_split_convert_extents(handle, inode, map, ppath, | ||
| 3947 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); | ||
| 3948 | if (err < 0) | ||
| 3949 | return err; | ||
| 3950 | path = ext4_find_extent(inode, map->m_lblk, ppath, 0); | ||
| 3951 | if (IS_ERR(path)) | ||
| 3952 | return PTR_ERR(path); | ||
| 3953 | depth = ext_depth(inode); | ||
| 3954 | ex = path[depth].p_ext; | ||
| 3955 | if (!ex) { | ||
| 3956 | EXT4_ERROR_INODE(inode, "unexpected hole at %lu", | ||
| 3957 | (unsigned long) map->m_lblk); | ||
| 3958 | return -EIO; | ||
| 3959 | } | ||
| 3960 | } | ||
| 3961 | |||
| 3962 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
| 3963 | if (err) | ||
| 3964 | return err; | ||
| 3965 | /* first mark the extent as unwritten */ | ||
| 3966 | ext4_ext_mark_unwritten(ex); | ||
| 3967 | |||
| 3968 | /* note: ext4_ext_correct_indexes() isn't needed here because | ||
| 3969 | * borders are not changed | ||
| 3970 | */ | ||
| 3971 | ext4_ext_try_to_merge(handle, inode, path, ex); | ||
| 3972 | |||
| 3973 | /* Mark modified extent as dirty */ | ||
| 3974 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | ||
| 3975 | if (err) | ||
| 3976 | return err; | ||
| 3977 | ext4_ext_show_leaf(inode, path); | ||
| 3978 | |||
| 3979 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
| 3980 | err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len); | ||
| 3981 | if (err) | ||
| 3982 | return err; | ||
| 3989 | map->m_flags |= EXT4_MAP_UNWRITTEN; | 3983 | map->m_flags |= EXT4_MAP_UNWRITTEN; |
| 3990 | if (allocated > map->m_len) | 3984 | if (allocated > map->m_len) |
| 3991 | allocated = map->m_len; | 3985 | allocated = map->m_len; |
| 3992 | map->m_len = allocated; | 3986 | map->m_len = allocated; |
| 3993 | 3987 | return allocated; | |
| 3994 | return err ? err : allocated; | ||
| 3995 | } | 3988 | } |
| 3996 | 3989 | ||
| 3997 | static int | 3990 | static int |
| 3998 | ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | 3991 | ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, |
| 3999 | struct ext4_map_blocks *map, | 3992 | struct ext4_map_blocks *map, |
| 4000 | struct ext4_ext_path *path, int flags, | 3993 | struct ext4_ext_path **ppath, int flags, |
| 4001 | unsigned int allocated, ext4_fsblk_t newblock) | 3994 | unsigned int allocated, ext4_fsblk_t newblock) |
| 4002 | { | 3995 | { |
| 3996 | struct ext4_ext_path *path = *ppath; | ||
| 4003 | int ret = 0; | 3997 | int ret = 0; |
| 4004 | int err = 0; | 3998 | int err = 0; |
| 4005 | ext4_io_end_t *io = ext4_inode_aio(inode); | 3999 | ext4_io_end_t *io = ext4_inode_aio(inode); |
| @@ -4021,8 +4015,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | |||
| 4021 | 4015 | ||
| 4022 | /* get_block() before submit the IO, split the extent */ | 4016 | /* get_block() before submit the IO, split the extent */ |
| 4023 | if (flags & EXT4_GET_BLOCKS_PRE_IO) { | 4017 | if (flags & EXT4_GET_BLOCKS_PRE_IO) { |
| 4024 | ret = ext4_split_convert_extents(handle, inode, map, | 4018 | ret = ext4_split_convert_extents(handle, inode, map, ppath, |
| 4025 | path, flags | EXT4_GET_BLOCKS_CONVERT); | 4019 | flags | EXT4_GET_BLOCKS_CONVERT); |
| 4026 | if (ret <= 0) | 4020 | if (ret <= 0) |
| 4027 | goto out; | 4021 | goto out; |
| 4028 | /* | 4022 | /* |
| @@ -4040,7 +4034,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | |||
| 4040 | /* IO end_io complete, convert the filled extent to written */ | 4034 | /* IO end_io complete, convert the filled extent to written */ |
| 4041 | if (flags & EXT4_GET_BLOCKS_CONVERT) { | 4035 | if (flags & EXT4_GET_BLOCKS_CONVERT) { |
| 4042 | ret = ext4_convert_unwritten_extents_endio(handle, inode, map, | 4036 | ret = ext4_convert_unwritten_extents_endio(handle, inode, map, |
| 4043 | path); | 4037 | ppath); |
| 4044 | if (ret >= 0) { | 4038 | if (ret >= 0) { |
| 4045 | ext4_update_inode_fsync_trans(handle, inode, 1); | 4039 | ext4_update_inode_fsync_trans(handle, inode, 1); |
| 4046 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | 4040 | err = check_eofblocks_fl(handle, inode, map->m_lblk, |
| @@ -4078,7 +4072,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, | |||
| 4078 | } | 4072 | } |
| 4079 | 4073 | ||
| 4080 | /* buffered write, writepage time, convert*/ | 4074 | /* buffered write, writepage time, convert*/ |
| 4081 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags); | 4075 | ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags); |
| 4082 | if (ret >= 0) | 4076 | if (ret >= 0) |
| 4083 | ext4_update_inode_fsync_trans(handle, inode, 1); | 4077 | ext4_update_inode_fsync_trans(handle, inode, 1); |
| 4084 | out: | 4078 | out: |
| @@ -4279,7 +4273,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4279 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 4273 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
| 4280 | 4274 | ||
| 4281 | /* find extent for this block */ | 4275 | /* find extent for this block */ |
| 4282 | path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0); | 4276 | path = ext4_find_extent(inode, map->m_lblk, NULL, 0); |
| 4283 | if (IS_ERR(path)) { | 4277 | if (IS_ERR(path)) { |
| 4284 | err = PTR_ERR(path); | 4278 | err = PTR_ERR(path); |
| 4285 | path = NULL; | 4279 | path = NULL; |
| @@ -4291,7 +4285,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4291 | /* | 4285 | /* |
| 4292 | * consistent leaf must not be empty; | 4286 | * consistent leaf must not be empty; |
| 4293 | * this situation is possible, though, _during_ tree modification; | 4287 | * this situation is possible, though, _during_ tree modification; |
| 4294 | * this is why assert can't be put in ext4_ext_find_extent() | 4288 | * this is why assert can't be put in ext4_find_extent() |
| 4295 | */ | 4289 | */ |
| 4296 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { | 4290 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { |
| 4297 | EXT4_ERROR_INODE(inode, "bad extent address " | 4291 | EXT4_ERROR_INODE(inode, "bad extent address " |
| @@ -4331,15 +4325,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4331 | */ | 4325 | */ |
| 4332 | if ((!ext4_ext_is_unwritten(ex)) && | 4326 | if ((!ext4_ext_is_unwritten(ex)) && |
| 4333 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { | 4327 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { |
| 4334 | allocated = ext4_ext_convert_initialized_extent( | 4328 | allocated = convert_initialized_extent( |
| 4335 | handle, inode, map, path, flags, | 4329 | handle, inode, map, &path, |
| 4336 | allocated, newblock); | 4330 | flags, allocated, newblock); |
| 4337 | goto out2; | 4331 | goto out2; |
| 4338 | } else if (!ext4_ext_is_unwritten(ex)) | 4332 | } else if (!ext4_ext_is_unwritten(ex)) |
| 4339 | goto out; | 4333 | goto out; |
| 4340 | 4334 | ||
| 4341 | ret = ext4_ext_handle_unwritten_extents( | 4335 | ret = ext4_ext_handle_unwritten_extents( |
| 4342 | handle, inode, map, path, flags, | 4336 | handle, inode, map, &path, flags, |
| 4343 | allocated, newblock); | 4337 | allocated, newblock); |
| 4344 | if (ret < 0) | 4338 | if (ret < 0) |
| 4345 | err = ret; | 4339 | err = ret; |
| @@ -4376,7 +4370,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4376 | 4370 | ||
| 4377 | /* | 4371 | /* |
| 4378 | * If we are doing bigalloc, check to see if the extent returned | 4372 | * If we are doing bigalloc, check to see if the extent returned |
| 4379 | * by ext4_ext_find_extent() implies a cluster we can use. | 4373 | * by ext4_find_extent() implies a cluster we can use. |
| 4380 | */ | 4374 | */ |
| 4381 | if (cluster_offset && ex && | 4375 | if (cluster_offset && ex && |
| 4382 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { | 4376 | get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { |
| @@ -4451,6 +4445,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
| 4451 | ar.flags = 0; | 4445 | ar.flags = 0; |
| 4452 | if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) | 4446 | if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) |
| 4453 | ar.flags |= EXT4_MB_HINT_NOPREALLOC; | 4447 | ar.flags |= EXT4_MB_HINT_NOPREALLOC; |
| 4448 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
| 4449 | ar.flags |= EXT4_MB_DELALLOC_RESERVED; | ||
| 4454 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 4450 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
| 4455 | if (!newblock) | 4451 | if (!newblock) |
| 4456 | goto out2; | 4452 | goto out2; |
| @@ -4486,7 +4482,7 @@ got_allocated_blocks: | |||
| 4486 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | 4482 | err = check_eofblocks_fl(handle, inode, map->m_lblk, |
| 4487 | path, ar.len); | 4483 | path, ar.len); |
| 4488 | if (!err) | 4484 | if (!err) |
| 4489 | err = ext4_ext_insert_extent(handle, inode, path, | 4485 | err = ext4_ext_insert_extent(handle, inode, &path, |
| 4490 | &newex, flags); | 4486 | &newex, flags); |
| 4491 | 4487 | ||
| 4492 | if (!err && set_unwritten) { | 4488 | if (!err && set_unwritten) { |
| @@ -4619,10 +4615,8 @@ out: | |||
| 4619 | map->m_pblk = newblock; | 4615 | map->m_pblk = newblock; |
| 4620 | map->m_len = allocated; | 4616 | map->m_len = allocated; |
| 4621 | out2: | 4617 | out2: |
| 4622 | if (path) { | 4618 | ext4_ext_drop_refs(path); |
| 4623 | ext4_ext_drop_refs(path); | 4619 | kfree(path); |
| 4624 | kfree(path); | ||
| 4625 | } | ||
| 4626 | 4620 | ||
| 4627 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4621 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
| 4628 | err ? err : allocated); | 4622 | err ? err : allocated); |
| @@ -4799,7 +4793,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
| 4799 | max_blocks -= lblk; | 4793 | max_blocks -= lblk; |
| 4800 | 4794 | ||
| 4801 | flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | | 4795 | flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | |
| 4802 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; | 4796 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | |
| 4797 | EXT4_EX_NOCACHE; | ||
| 4803 | if (mode & FALLOC_FL_KEEP_SIZE) | 4798 | if (mode & FALLOC_FL_KEEP_SIZE) |
| 4804 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | 4799 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; |
| 4805 | 4800 | ||
| @@ -4837,15 +4832,21 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
| 4837 | ext4_inode_block_unlocked_dio(inode); | 4832 | ext4_inode_block_unlocked_dio(inode); |
| 4838 | inode_dio_wait(inode); | 4833 | inode_dio_wait(inode); |
| 4839 | 4834 | ||
| 4835 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | ||
| 4836 | flags, mode); | ||
| 4837 | if (ret) | ||
| 4838 | goto out_dio; | ||
| 4840 | /* | 4839 | /* |
| 4841 | * Remove entire range from the extent status tree. | 4840 | * Remove entire range from the extent status tree. |
| 4841 | * | ||
| 4842 | * ext4_es_remove_extent(inode, lblk, max_blocks) is | ||
| 4843 | * NOT sufficient. I'm not sure why this is the case, | ||
| 4844 | * but let's be conservative and remove the extent | ||
| 4845 | * status tree for the entire inode. There should be | ||
| 4846 | * no outstanding delalloc extents thanks to the | ||
| 4847 | * filemap_write_and_wait_range() call above. | ||
| 4842 | */ | 4848 | */ |
| 4843 | ret = ext4_es_remove_extent(inode, lblk, max_blocks); | 4849 | ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
| 4844 | if (ret) | ||
| 4845 | goto out_dio; | ||
| 4846 | |||
| 4847 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, | ||
| 4848 | flags, mode); | ||
| 4849 | if (ret) | 4850 | if (ret) |
| 4850 | goto out_dio; | 4851 | goto out_dio; |
| 4851 | } | 4852 | } |
| @@ -5304,36 +5305,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
| 5304 | struct ext4_ext_path *path; | 5305 | struct ext4_ext_path *path; |
| 5305 | int ret = 0, depth; | 5306 | int ret = 0, depth; |
| 5306 | struct ext4_extent *extent; | 5307 | struct ext4_extent *extent; |
| 5307 | ext4_lblk_t stop_block, current_block; | 5308 | ext4_lblk_t stop_block; |
| 5308 | ext4_lblk_t ex_start, ex_end; | 5309 | ext4_lblk_t ex_start, ex_end; |
| 5309 | 5310 | ||
| 5310 | /* Let path point to the last extent */ | 5311 | /* Let path point to the last extent */ |
| 5311 | path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); | 5312 | path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); |
| 5312 | if (IS_ERR(path)) | 5313 | if (IS_ERR(path)) |
| 5313 | return PTR_ERR(path); | 5314 | return PTR_ERR(path); |
| 5314 | 5315 | ||
| 5315 | depth = path->p_depth; | 5316 | depth = path->p_depth; |
| 5316 | extent = path[depth].p_ext; | 5317 | extent = path[depth].p_ext; |
| 5317 | if (!extent) { | 5318 | if (!extent) |
| 5318 | ext4_ext_drop_refs(path); | 5319 | goto out; |
| 5319 | kfree(path); | ||
| 5320 | return ret; | ||
| 5321 | } | ||
| 5322 | 5320 | ||
| 5323 | stop_block = le32_to_cpu(extent->ee_block) + | 5321 | stop_block = le32_to_cpu(extent->ee_block) + |
| 5324 | ext4_ext_get_actual_len(extent); | 5322 | ext4_ext_get_actual_len(extent); |
| 5325 | ext4_ext_drop_refs(path); | ||
| 5326 | kfree(path); | ||
| 5327 | 5323 | ||
| 5328 | /* Nothing to shift, if hole is at the end of file */ | 5324 | /* Nothing to shift, if hole is at the end of file */ |
| 5329 | if (start >= stop_block) | 5325 | if (start >= stop_block) |
| 5330 | return ret; | 5326 | goto out; |
| 5331 | 5327 | ||
| 5332 | /* | 5328 | /* |
| 5333 | * Don't start shifting extents until we make sure the hole is big | 5329 | * Don't start shifting extents until we make sure the hole is big |
| 5334 | * enough to accomodate the shift. | 5330 | * enough to accomodate the shift. |
| 5335 | */ | 5331 | */ |
| 5336 | path = ext4_ext_find_extent(inode, start - 1, NULL, 0); | 5332 | path = ext4_find_extent(inode, start - 1, &path, 0); |
| 5337 | if (IS_ERR(path)) | 5333 | if (IS_ERR(path)) |
| 5338 | return PTR_ERR(path); | 5334 | return PTR_ERR(path); |
| 5339 | depth = path->p_depth; | 5335 | depth = path->p_depth; |
| @@ -5346,8 +5342,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
| 5346 | ex_start = 0; | 5342 | ex_start = 0; |
| 5347 | ex_end = 0; | 5343 | ex_end = 0; |
| 5348 | } | 5344 | } |
| 5349 | ext4_ext_drop_refs(path); | ||
| 5350 | kfree(path); | ||
| 5351 | 5345 | ||
| 5352 | if ((start == ex_start && shift > ex_start) || | 5346 | if ((start == ex_start && shift > ex_start) || |
| 5353 | (shift > start - ex_end)) | 5347 | (shift > start - ex_end)) |
| @@ -5355,7 +5349,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
| 5355 | 5349 | ||
| 5356 | /* Its safe to start updating extents */ | 5350 | /* Its safe to start updating extents */ |
| 5357 | while (start < stop_block) { | 5351 | while (start < stop_block) { |
| 5358 | path = ext4_ext_find_extent(inode, start, NULL, 0); | 5352 | path = ext4_find_extent(inode, start, &path, 0); |
| 5359 | if (IS_ERR(path)) | 5353 | if (IS_ERR(path)) |
| 5360 | return PTR_ERR(path); | 5354 | return PTR_ERR(path); |
| 5361 | depth = path->p_depth; | 5355 | depth = path->p_depth; |
| @@ -5365,27 +5359,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | |||
| 5365 | (unsigned long) start); | 5359 | (unsigned long) start); |
| 5366 | return -EIO; | 5360 | return -EIO; |
| 5367 | } | 5361 | } |
| 5368 | 5362 | if (start > le32_to_cpu(extent->ee_block)) { | |
| 5369 | current_block = le32_to_cpu(extent->ee_block); | ||
| 5370 | if (start > current_block) { | ||
| 5371 | /* Hole, move to the next extent */ | 5363 | /* Hole, move to the next extent */ |
| 5372 | ret = mext_next_extent(inode, path, &extent); | 5364 | if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { |
| 5373 | if (ret != 0) { | 5365 | path[depth].p_ext++; |
| 5374 | ext4_ext_drop_refs(path); | 5366 | } else { |
| 5375 | kfree(path); | 5367 | start = ext4_ext_next_allocated_block(path); |
| 5376 | if (ret == 1) | 5368 | continue; |
| 5377 | ret = 0; | ||
| 5378 | break; | ||
| 5379 | } | 5369 | } |
| 5380 | } | 5370 | } |
| 5381 | ret = ext4_ext_shift_path_extents(path, shift, inode, | 5371 | ret = ext4_ext_shift_path_extents(path, shift, inode, |
| 5382 | handle, &start); | 5372 | handle, &start); |
| 5383 | ext4_ext_drop_refs(path); | ||
| 5384 | kfree(path); | ||
| 5385 | if (ret) | 5373 | if (ret) |
| 5386 | break; | 5374 | break; |
| 5387 | } | 5375 | } |
| 5388 | 5376 | out: | |
| 5377 | ext4_ext_drop_refs(path); | ||
| 5378 | kfree(path); | ||
| 5389 | return ret; | 5379 | return ret; |
| 5390 | } | 5380 | } |
| 5391 | 5381 | ||
| @@ -5508,3 +5498,199 @@ out_mutex: | |||
| 5508 | mutex_unlock(&inode->i_mutex); | 5498 | mutex_unlock(&inode->i_mutex); |
| 5509 | return ret; | 5499 | return ret; |
| 5510 | } | 5500 | } |
| 5501 | |||
| 5502 | /** | ||
| 5503 | * ext4_swap_extents - Swap extents between two inodes | ||
| 5504 | * | ||
| 5505 | * @inode1: First inode | ||
| 5506 | * @inode2: Second inode | ||
| 5507 | * @lblk1: Start block for first inode | ||
| 5508 | * @lblk2: Start block for second inode | ||
| 5509 | * @count: Number of blocks to swap | ||
| 5510 | * @mark_unwritten: Mark second inode's extents as unwritten after swap | ||
| 5511 | * @erp: Pointer to save error value | ||
| 5512 | * | ||
| 5513 | * This helper routine does exactly what is promise "swap extents". All other | ||
| 5514 | * stuff such as page-cache locking consistency, bh mapping consistency or | ||
| 5515 | * extent's data copying must be performed by caller. | ||
| 5516 | * Locking: | ||
| 5517 | * i_mutex is held for both inodes | ||
| 5518 | * i_data_sem is locked for write for both inodes | ||
| 5519 | * Assumptions: | ||
| 5520 | * All pages from requested range are locked for both inodes | ||
| 5521 | */ | ||
| 5522 | int | ||
| 5523 | ext4_swap_extents(handle_t *handle, struct inode *inode1, | ||
| 5524 | struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, | ||
| 5525 | ext4_lblk_t count, int unwritten, int *erp) | ||
| 5526 | { | ||
| 5527 | struct ext4_ext_path *path1 = NULL; | ||
| 5528 | struct ext4_ext_path *path2 = NULL; | ||
| 5529 | int replaced_count = 0; | ||
| 5530 | |||
| 5531 | BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem)); | ||
| 5532 | BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem)); | ||
| 5533 | BUG_ON(!mutex_is_locked(&inode1->i_mutex)); | ||
| 5534 | BUG_ON(!mutex_is_locked(&inode1->i_mutex)); | ||
| 5535 | |||
| 5536 | *erp = ext4_es_remove_extent(inode1, lblk1, count); | ||
| 5537 | if (unlikely(*erp)) | ||
| 5538 | return 0; | ||
| 5539 | *erp = ext4_es_remove_extent(inode2, lblk2, count); | ||
| 5540 | if (unlikely(*erp)) | ||
| 5541 | return 0; | ||
| 5542 | |||
| 5543 | while (count) { | ||
| 5544 | struct ext4_extent *ex1, *ex2, tmp_ex; | ||
| 5545 | ext4_lblk_t e1_blk, e2_blk; | ||
| 5546 | int e1_len, e2_len, len; | ||
| 5547 | int split = 0; | ||
| 5548 | |||
| 5549 | path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); | ||
| 5550 | if (unlikely(IS_ERR(path1))) { | ||
| 5551 | *erp = PTR_ERR(path1); | ||
| 5552 | path1 = NULL; | ||
| 5553 | finish: | ||
| 5554 | count = 0; | ||
| 5555 | goto repeat; | ||
| 5556 | } | ||
| 5557 | path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); | ||
| 5558 | if (unlikely(IS_ERR(path2))) { | ||
| 5559 | *erp = PTR_ERR(path2); | ||
| 5560 | path2 = NULL; | ||
| 5561 | goto finish; | ||
| 5562 | } | ||
| 5563 | ex1 = path1[path1->p_depth].p_ext; | ||
| 5564 | ex2 = path2[path2->p_depth].p_ext; | ||
| 5565 | /* Do we have somthing to swap ? */ | ||
| 5566 | if (unlikely(!ex2 || !ex1)) | ||
| 5567 | goto finish; | ||
| 5568 | |||
| 5569 | e1_blk = le32_to_cpu(ex1->ee_block); | ||
| 5570 | e2_blk = le32_to_cpu(ex2->ee_block); | ||
| 5571 | e1_len = ext4_ext_get_actual_len(ex1); | ||
| 5572 | e2_len = ext4_ext_get_actual_len(ex2); | ||
| 5573 | |||
| 5574 | /* Hole handling */ | ||
| 5575 | if (!in_range(lblk1, e1_blk, e1_len) || | ||
| 5576 | !in_range(lblk2, e2_blk, e2_len)) { | ||
| 5577 | ext4_lblk_t next1, next2; | ||
| 5578 | |||
| 5579 | /* if hole after extent, then go to next extent */ | ||
| 5580 | next1 = ext4_ext_next_allocated_block(path1); | ||
| 5581 | next2 = ext4_ext_next_allocated_block(path2); | ||
| 5582 | /* If hole before extent, then shift to that extent */ | ||
| 5583 | if (e1_blk > lblk1) | ||
| 5584 | next1 = e1_blk; | ||
| 5585 | if (e2_blk > lblk2) | ||
| 5586 | next2 = e1_blk; | ||
| 5587 | /* Do we have something to swap */ | ||
| 5588 | if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) | ||
| 5589 | goto finish; | ||
| 5590 | /* Move to the rightest boundary */ | ||
| 5591 | len = next1 - lblk1; | ||
| 5592 | if (len < next2 - lblk2) | ||
| 5593 | len = next2 - lblk2; | ||
| 5594 | if (len > count) | ||
| 5595 | len = count; | ||
| 5596 | lblk1 += len; | ||
| 5597 | lblk2 += len; | ||
| 5598 | count -= len; | ||
| 5599 | goto repeat; | ||
| 5600 | } | ||
| 5601 | |||
| 5602 | /* Prepare left boundary */ | ||
| 5603 | if (e1_blk < lblk1) { | ||
| 5604 | split = 1; | ||
| 5605 | *erp = ext4_force_split_extent_at(handle, inode1, | ||
| 5606 | &path1, lblk1, 0); | ||
| 5607 | if (unlikely(*erp)) | ||
| 5608 | goto finish; | ||
| 5609 | } | ||
| 5610 | if (e2_blk < lblk2) { | ||
| 5611 | split = 1; | ||
| 5612 | *erp = ext4_force_split_extent_at(handle, inode2, | ||
| 5613 | &path2, lblk2, 0); | ||
| 5614 | if (unlikely(*erp)) | ||
| 5615 | goto finish; | ||
| 5616 | } | ||
| 5617 | /* ext4_split_extent_at() may result in leaf extent split, | ||
| 5618 | * path must to be revalidated. */ | ||
| 5619 | if (split) | ||
| 5620 | goto repeat; | ||
| 5621 | |||
| 5622 | /* Prepare right boundary */ | ||
| 5623 | len = count; | ||
| 5624 | if (len > e1_blk + e1_len - lblk1) | ||
| 5625 | len = e1_blk + e1_len - lblk1; | ||
| 5626 | if (len > e2_blk + e2_len - lblk2) | ||
| 5627 | len = e2_blk + e2_len - lblk2; | ||
| 5628 | |||
| 5629 | if (len != e1_len) { | ||
| 5630 | split = 1; | ||
| 5631 | *erp = ext4_force_split_extent_at(handle, inode1, | ||
| 5632 | &path1, lblk1 + len, 0); | ||
| 5633 | if (unlikely(*erp)) | ||
| 5634 | goto finish; | ||
| 5635 | } | ||
| 5636 | if (len != e2_len) { | ||
| 5637 | split = 1; | ||
| 5638 | *erp = ext4_force_split_extent_at(handle, inode2, | ||
| 5639 | &path2, lblk2 + len, 0); | ||
| 5640 | if (*erp) | ||
| 5641 | goto finish; | ||
| 5642 | } | ||
| 5643 | /* ext4_split_extent_at() may result in leaf extent split, | ||
| 5644 | * path must to be revalidated. */ | ||
| 5645 | if (split) | ||
| 5646 | goto repeat; | ||
| 5647 | |||
| 5648 | BUG_ON(e2_len != e1_len); | ||
| 5649 | *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth); | ||
| 5650 | if (unlikely(*erp)) | ||
| 5651 | goto finish; | ||
| 5652 | *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth); | ||
| 5653 | if (unlikely(*erp)) | ||
| 5654 | goto finish; | ||
| 5655 | |||
| 5656 | /* Both extents are fully inside boundaries. Swap it now */ | ||
| 5657 | tmp_ex = *ex1; | ||
| 5658 | ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2)); | ||
| 5659 | ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex)); | ||
| 5660 | ex1->ee_len = cpu_to_le16(e2_len); | ||
| 5661 | ex2->ee_len = cpu_to_le16(e1_len); | ||
| 5662 | if (unwritten) | ||
| 5663 | ext4_ext_mark_unwritten(ex2); | ||
| 5664 | if (ext4_ext_is_unwritten(&tmp_ex)) | ||
| 5665 | ext4_ext_mark_unwritten(ex1); | ||
| 5666 | |||
| 5667 | ext4_ext_try_to_merge(handle, inode2, path2, ex2); | ||
| 5668 | ext4_ext_try_to_merge(handle, inode1, path1, ex1); | ||
| 5669 | *erp = ext4_ext_dirty(handle, inode2, path2 + | ||
| 5670 | path2->p_depth); | ||
| 5671 | if (unlikely(*erp)) | ||
| 5672 | goto finish; | ||
| 5673 | *erp = ext4_ext_dirty(handle, inode1, path1 + | ||
| 5674 | path1->p_depth); | ||
| 5675 | /* | ||
| 5676 | * Looks scarry ah..? second inode already points to new blocks, | ||
| 5677 | * and it was successfully dirtied. But luckily error may happen | ||
| 5678 | * only due to journal error, so full transaction will be | ||
| 5679 | * aborted anyway. | ||
| 5680 | */ | ||
| 5681 | if (unlikely(*erp)) | ||
| 5682 | goto finish; | ||
| 5683 | lblk1 += len; | ||
| 5684 | lblk2 += len; | ||
| 5685 | replaced_count += len; | ||
| 5686 | count -= len; | ||
| 5687 | |||
| 5688 | repeat: | ||
| 5689 | ext4_ext_drop_refs(path1); | ||
| 5690 | kfree(path1); | ||
| 5691 | ext4_ext_drop_refs(path2); | ||
| 5692 | kfree(path2); | ||
| 5693 | path1 = path2 = NULL; | ||
| 5694 | } | ||
| 5695 | return replaced_count; | ||
| 5696 | } | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 0b7e28e7eaa4..94e7855ae71b 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | */ | 11 | */ |
| 12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
| 13 | #include <linux/list_sort.h> | 13 | #include <linux/list_sort.h> |
| 14 | #include <linux/proc_fs.h> | ||
| 15 | #include <linux/seq_file.h> | ||
| 14 | #include "ext4.h" | 16 | #include "ext4.h" |
| 15 | #include "extents_status.h" | 17 | #include "extents_status.h" |
| 16 | 18 | ||
| @@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
| 313 | */ | 315 | */ |
| 314 | if (!ext4_es_is_delayed(es)) { | 316 | if (!ext4_es_is_delayed(es)) { |
| 315 | EXT4_I(inode)->i_es_lru_nr++; | 317 | EXT4_I(inode)->i_es_lru_nr++; |
| 316 | percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); | 318 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> |
| 319 | s_es_stats.es_stats_lru_cnt); | ||
| 317 | } | 320 | } |
| 318 | 321 | ||
| 322 | EXT4_I(inode)->i_es_all_nr++; | ||
| 323 | percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | ||
| 324 | |||
| 319 | return es; | 325 | return es; |
| 320 | } | 326 | } |
| 321 | 327 | ||
| 322 | static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | 328 | static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) |
| 323 | { | 329 | { |
| 330 | EXT4_I(inode)->i_es_all_nr--; | ||
| 331 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | ||
| 332 | |||
| 324 | /* Decrease the lru counter when this es is not delayed */ | 333 | /* Decrease the lru counter when this es is not delayed */ |
| 325 | if (!ext4_es_is_delayed(es)) { | 334 | if (!ext4_es_is_delayed(es)) { |
| 326 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 335 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); |
| 327 | EXT4_I(inode)->i_es_lru_nr--; | 336 | EXT4_I(inode)->i_es_lru_nr--; |
| 328 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); | 337 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> |
| 338 | s_es_stats.es_stats_lru_cnt); | ||
| 329 | } | 339 | } |
| 330 | 340 | ||
| 331 | kmem_cache_free(ext4_es_cachep, es); | 341 | kmem_cache_free(ext4_es_cachep, es); |
| @@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
| 426 | unsigned short ee_len; | 436 | unsigned short ee_len; |
| 427 | int depth, ee_status, es_status; | 437 | int depth, ee_status, es_status; |
| 428 | 438 | ||
| 429 | path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); | 439 | path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); |
| 430 | if (IS_ERR(path)) | 440 | if (IS_ERR(path)) |
| 431 | return; | 441 | return; |
| 432 | 442 | ||
| @@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
| 499 | } | 509 | } |
| 500 | } | 510 | } |
| 501 | out: | 511 | out: |
| 502 | if (path) { | 512 | ext4_ext_drop_refs(path); |
| 503 | ext4_ext_drop_refs(path); | 513 | kfree(path); |
| 504 | kfree(path); | ||
| 505 | } | ||
| 506 | } | 514 | } |
| 507 | 515 | ||
| 508 | static void ext4_es_insert_extent_ind_check(struct inode *inode, | 516 | static void ext4_es_insert_extent_ind_check(struct inode *inode, |
| @@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 731 | struct extent_status *es) | 739 | struct extent_status *es) |
| 732 | { | 740 | { |
| 733 | struct ext4_es_tree *tree; | 741 | struct ext4_es_tree *tree; |
| 742 | struct ext4_es_stats *stats; | ||
| 734 | struct extent_status *es1 = NULL; | 743 | struct extent_status *es1 = NULL; |
| 735 | struct rb_node *node; | 744 | struct rb_node *node; |
| 736 | int found = 0; | 745 | int found = 0; |
| @@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 767 | } | 776 | } |
| 768 | 777 | ||
| 769 | out: | 778 | out: |
| 779 | stats = &EXT4_SB(inode->i_sb)->s_es_stats; | ||
| 770 | if (found) { | 780 | if (found) { |
| 771 | BUG_ON(!es1); | 781 | BUG_ON(!es1); |
| 772 | es->es_lblk = es1->es_lblk; | 782 | es->es_lblk = es1->es_lblk; |
| 773 | es->es_len = es1->es_len; | 783 | es->es_len = es1->es_len; |
| 774 | es->es_pblk = es1->es_pblk; | 784 | es->es_pblk = es1->es_pblk; |
| 785 | stats->es_stats_cache_hits++; | ||
| 786 | } else { | ||
| 787 | stats->es_stats_cache_misses++; | ||
| 775 | } | 788 | } |
| 776 | 789 | ||
| 777 | read_unlock(&EXT4_I(inode)->i_es_lock); | 790 | read_unlock(&EXT4_I(inode)->i_es_lock); |
| @@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | |||
| 933 | struct ext4_inode_info *locked_ei) | 946 | struct ext4_inode_info *locked_ei) |
| 934 | { | 947 | { |
| 935 | struct ext4_inode_info *ei; | 948 | struct ext4_inode_info *ei; |
| 949 | struct ext4_es_stats *es_stats; | ||
| 936 | struct list_head *cur, *tmp; | 950 | struct list_head *cur, *tmp; |
| 937 | LIST_HEAD(skipped); | 951 | LIST_HEAD(skipped); |
| 952 | ktime_t start_time; | ||
| 953 | u64 scan_time; | ||
| 938 | int nr_shrunk = 0; | 954 | int nr_shrunk = 0; |
| 939 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 955 | int retried = 0, skip_precached = 1, nr_skipped = 0; |
| 940 | 956 | ||
| 957 | es_stats = &sbi->s_es_stats; | ||
| 958 | start_time = ktime_get(); | ||
| 941 | spin_lock(&sbi->s_es_lru_lock); | 959 | spin_lock(&sbi->s_es_lru_lock); |
| 942 | 960 | ||
| 943 | retry: | 961 | retry: |
| @@ -948,7 +966,8 @@ retry: | |||
| 948 | * If we have already reclaimed all extents from extent | 966 | * If we have already reclaimed all extents from extent |
| 949 | * status tree, just stop the loop immediately. | 967 | * status tree, just stop the loop immediately. |
| 950 | */ | 968 | */ |
| 951 | if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | 969 | if (percpu_counter_read_positive( |
| 970 | &es_stats->es_stats_lru_cnt) == 0) | ||
| 952 | break; | 971 | break; |
| 953 | 972 | ||
| 954 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 973 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
| @@ -958,7 +977,7 @@ retry: | |||
| 958 | * time. Normally we try hard to avoid shrinking | 977 | * time. Normally we try hard to avoid shrinking |
| 959 | * precached inodes, but we will as a last resort. | 978 | * precached inodes, but we will as a last resort. |
| 960 | */ | 979 | */ |
| 961 | if ((sbi->s_es_last_sorted < ei->i_touch_when) || | 980 | if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || |
| 962 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 981 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, |
| 963 | EXT4_STATE_EXT_PRECACHED))) { | 982 | EXT4_STATE_EXT_PRECACHED))) { |
| 964 | nr_skipped++; | 983 | nr_skipped++; |
| @@ -992,7 +1011,7 @@ retry: | |||
| 992 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | 1011 | if ((nr_shrunk == 0) && nr_skipped && !retried) { |
| 993 | retried++; | 1012 | retried++; |
| 994 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | 1013 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); |
| 995 | sbi->s_es_last_sorted = jiffies; | 1014 | es_stats->es_stats_last_sorted = jiffies; |
| 996 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | 1015 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, |
| 997 | i_es_lru); | 1016 | i_es_lru); |
| 998 | /* | 1017 | /* |
| @@ -1010,6 +1029,22 @@ retry: | |||
| 1010 | if (locked_ei && nr_shrunk == 0) | 1029 | if (locked_ei && nr_shrunk == 0) |
| 1011 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); |
| 1012 | 1031 | ||
| 1032 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | ||
| 1033 | if (likely(es_stats->es_stats_scan_time)) | ||
| 1034 | es_stats->es_stats_scan_time = (scan_time + | ||
| 1035 | es_stats->es_stats_scan_time*3) / 4; | ||
| 1036 | else | ||
| 1037 | es_stats->es_stats_scan_time = scan_time; | ||
| 1038 | if (scan_time > es_stats->es_stats_max_scan_time) | ||
| 1039 | es_stats->es_stats_max_scan_time = scan_time; | ||
| 1040 | if (likely(es_stats->es_stats_shrunk)) | ||
| 1041 | es_stats->es_stats_shrunk = (nr_shrunk + | ||
| 1042 | es_stats->es_stats_shrunk*3) / 4; | ||
| 1043 | else | ||
| 1044 | es_stats->es_stats_shrunk = nr_shrunk; | ||
| 1045 | |||
| 1046 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, | ||
| 1047 | nr_skipped, retried); | ||
| 1013 | return nr_shrunk; | 1048 | return nr_shrunk; |
| 1014 | } | 1049 | } |
| 1015 | 1050 | ||
| @@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink, | |||
| 1020 | struct ext4_sb_info *sbi; | 1055 | struct ext4_sb_info *sbi; |
| 1021 | 1056 | ||
| 1022 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 1057 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); |
| 1023 | nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 1058 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); |
| 1024 | trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); | 1059 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); |
| 1025 | return nr; | 1060 | return nr; |
| 1026 | } | 1061 | } |
| 1027 | 1062 | ||
| @@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, | |||
| 1033 | int nr_to_scan = sc->nr_to_scan; | 1068 | int nr_to_scan = sc->nr_to_scan; |
| 1034 | int ret, nr_shrunk; | 1069 | int ret, nr_shrunk; |
| 1035 | 1070 | ||
| 1036 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 1071 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); |
| 1037 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | 1072 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); |
| 1038 | 1073 | ||
| 1039 | if (!nr_to_scan) | 1074 | if (!nr_to_scan) |
| 1040 | return ret; | 1075 | return ret; |
| 1041 | 1076 | ||
| 1042 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1077 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); |
| 1043 | 1078 | ||
| 1044 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 1079 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); |
| 1045 | return nr_shrunk; | 1080 | return nr_shrunk; |
| 1046 | } | 1081 | } |
| 1047 | 1082 | ||
| 1048 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) | 1083 | static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos) |
| 1049 | { | 1084 | { |
| 1085 | return *pos ? NULL : SEQ_START_TOKEN; | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | static void * | ||
| 1089 | ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos) | ||
| 1090 | { | ||
| 1091 | return NULL; | ||
| 1092 | } | ||
| 1093 | |||
| 1094 | static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | ||
| 1095 | { | ||
| 1096 | struct ext4_sb_info *sbi = seq->private; | ||
| 1097 | struct ext4_es_stats *es_stats = &sbi->s_es_stats; | ||
| 1098 | struct ext4_inode_info *ei, *max = NULL; | ||
| 1099 | unsigned int inode_cnt = 0; | ||
| 1100 | |||
| 1101 | if (v != SEQ_START_TOKEN) | ||
| 1102 | return 0; | ||
| 1103 | |||
| 1104 | /* here we just find an inode that has the max nr. of objects */ | ||
| 1105 | spin_lock(&sbi->s_es_lru_lock); | ||
| 1106 | list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { | ||
| 1107 | inode_cnt++; | ||
| 1108 | if (max && max->i_es_all_nr < ei->i_es_all_nr) | ||
| 1109 | max = ei; | ||
| 1110 | else if (!max) | ||
| 1111 | max = ei; | ||
| 1112 | } | ||
| 1113 | spin_unlock(&sbi->s_es_lru_lock); | ||
| 1114 | |||
| 1115 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | ||
| 1116 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | ||
| 1117 | percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); | ||
| 1118 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | ||
| 1119 | es_stats->es_stats_cache_hits, | ||
| 1120 | es_stats->es_stats_cache_misses); | ||
| 1121 | if (es_stats->es_stats_last_sorted != 0) | ||
| 1122 | seq_printf(seq, " %u ms last sorted interval\n", | ||
| 1123 | jiffies_to_msecs(jiffies - | ||
| 1124 | es_stats->es_stats_last_sorted)); | ||
| 1125 | if (inode_cnt) | ||
| 1126 | seq_printf(seq, " %d inodes on lru list\n", inode_cnt); | ||
| 1127 | |||
| 1128 | seq_printf(seq, "average:\n %llu us scan time\n", | ||
| 1129 | div_u64(es_stats->es_stats_scan_time, 1000)); | ||
| 1130 | seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk); | ||
| 1131 | if (inode_cnt) | ||
| 1132 | seq_printf(seq, | ||
| 1133 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" | ||
| 1134 | " %llu us max scan time\n", | ||
| 1135 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, | ||
| 1136 | div_u64(es_stats->es_stats_max_scan_time, 1000)); | ||
| 1137 | |||
| 1138 | return 0; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v) | ||
| 1142 | { | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | static const struct seq_operations ext4_es_seq_shrinker_info_ops = { | ||
| 1146 | .start = ext4_es_seq_shrinker_info_start, | ||
| 1147 | .next = ext4_es_seq_shrinker_info_next, | ||
| 1148 | .stop = ext4_es_seq_shrinker_info_stop, | ||
| 1149 | .show = ext4_es_seq_shrinker_info_show, | ||
| 1150 | }; | ||
| 1151 | |||
| 1152 | static int | ||
| 1153 | ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file) | ||
| 1154 | { | ||
| 1155 | int ret; | ||
| 1156 | |||
| 1157 | ret = seq_open(file, &ext4_es_seq_shrinker_info_ops); | ||
| 1158 | if (!ret) { | ||
| 1159 | struct seq_file *m = file->private_data; | ||
| 1160 | m->private = PDE_DATA(inode); | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | return ret; | ||
| 1164 | } | ||
| 1165 | |||
| 1166 | static int | ||
| 1167 | ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file) | ||
| 1168 | { | ||
| 1169 | return seq_release(inode, file); | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | static const struct file_operations ext4_es_seq_shrinker_info_fops = { | ||
| 1173 | .owner = THIS_MODULE, | ||
| 1174 | .open = ext4_es_seq_shrinker_info_open, | ||
| 1175 | .read = seq_read, | ||
| 1176 | .llseek = seq_lseek, | ||
| 1177 | .release = ext4_es_seq_shrinker_info_release, | ||
| 1178 | }; | ||
| 1179 | |||
| 1180 | int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | ||
| 1181 | { | ||
| 1182 | int err; | ||
| 1183 | |||
| 1050 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1184 | INIT_LIST_HEAD(&sbi->s_es_lru); |
| 1051 | spin_lock_init(&sbi->s_es_lru_lock); | 1185 | spin_lock_init(&sbi->s_es_lru_lock); |
| 1052 | sbi->s_es_last_sorted = 0; | 1186 | sbi->s_es_stats.es_stats_last_sorted = 0; |
| 1187 | sbi->s_es_stats.es_stats_shrunk = 0; | ||
| 1188 | sbi->s_es_stats.es_stats_cache_hits = 0; | ||
| 1189 | sbi->s_es_stats.es_stats_cache_misses = 0; | ||
| 1190 | sbi->s_es_stats.es_stats_scan_time = 0; | ||
| 1191 | sbi->s_es_stats.es_stats_max_scan_time = 0; | ||
| 1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | ||
| 1193 | if (err) | ||
| 1194 | return err; | ||
| 1195 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); | ||
| 1196 | if (err) | ||
| 1197 | goto err1; | ||
| 1198 | |||
| 1053 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; | 1199 | sbi->s_es_shrinker.scan_objects = ext4_es_scan; |
| 1054 | sbi->s_es_shrinker.count_objects = ext4_es_count; | 1200 | sbi->s_es_shrinker.count_objects = ext4_es_count; |
| 1055 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 1201 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
| 1056 | register_shrinker(&sbi->s_es_shrinker); | 1202 | err = register_shrinker(&sbi->s_es_shrinker); |
| 1203 | if (err) | ||
| 1204 | goto err2; | ||
| 1205 | |||
| 1206 | if (sbi->s_proc) | ||
| 1207 | proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc, | ||
| 1208 | &ext4_es_seq_shrinker_info_fops, sbi); | ||
| 1209 | |||
| 1210 | return 0; | ||
| 1211 | |||
| 1212 | err2: | ||
| 1213 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | ||
| 1214 | err1: | ||
| 1215 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | ||
| 1216 | return err; | ||
| 1057 | } | 1217 | } |
| 1058 | 1218 | ||
| 1059 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | 1219 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
| 1060 | { | 1220 | { |
| 1221 | if (sbi->s_proc) | ||
| 1222 | remove_proc_entry("es_shrinker_info", sbi->s_proc); | ||
| 1223 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | ||
| 1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | ||
| 1061 | unregister_shrinker(&sbi->s_es_shrinker); | 1225 | unregister_shrinker(&sbi->s_es_shrinker); |
| 1062 | } | 1226 | } |
| 1063 | 1227 | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f1b62a419920..efd5f970b501 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
| @@ -64,6 +64,17 @@ struct ext4_es_tree { | |||
| 64 | struct extent_status *cache_es; /* recently accessed extent */ | 64 | struct extent_status *cache_es; /* recently accessed extent */ |
| 65 | }; | 65 | }; |
| 66 | 66 | ||
| 67 | struct ext4_es_stats { | ||
| 68 | unsigned long es_stats_last_sorted; | ||
| 69 | unsigned long es_stats_shrunk; | ||
| 70 | unsigned long es_stats_cache_hits; | ||
| 71 | unsigned long es_stats_cache_misses; | ||
| 72 | u64 es_stats_scan_time; | ||
| 73 | u64 es_stats_max_scan_time; | ||
| 74 | struct percpu_counter es_stats_all_cnt; | ||
| 75 | struct percpu_counter es_stats_lru_cnt; | ||
| 76 | }; | ||
| 77 | |||
| 67 | extern int __init ext4_init_es(void); | 78 | extern int __init ext4_init_es(void); |
| 68 | extern void ext4_exit_es(void); | 79 | extern void ext4_exit_es(void); |
| 69 | extern void ext4_es_init_tree(struct ext4_es_tree *tree); | 80 | extern void ext4_es_init_tree(struct ext4_es_tree *tree); |
| @@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, | |||
| 138 | (pb & ~ES_MASK)); | 149 | (pb & ~ES_MASK)); |
| 139 | } | 150 | } |
| 140 | 151 | ||
| 141 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 152 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
| 142 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 153 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
| 143 | extern void ext4_es_lru_add(struct inode *inode); | 154 | extern void ext4_es_lru_add(struct inode *inode); |
| 144 | extern void ext4_es_lru_del(struct inode *inode); | 155 | extern void ext4_es_lru_del(struct inode *inode); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index aca7b24a4432..8131be8c0af3 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -137,10 +137,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
| 137 | iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); | 137 | iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | iocb->private = &overwrite; | ||
| 140 | if (o_direct) { | 141 | if (o_direct) { |
| 141 | blk_start_plug(&plug); | 142 | blk_start_plug(&plug); |
| 142 | 143 | ||
| 143 | iocb->private = &overwrite; | ||
| 144 | 144 | ||
| 145 | /* check whether we do a DIO overwrite or not */ | 145 | /* check whether we do a DIO overwrite or not */ |
| 146 | if (ext4_should_dioread_nolock(inode) && !aio_mutex && | 146 | if (ext4_should_dioread_nolock(inode) && !aio_mutex && |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 5b87fc36aab8..ac644c31ca67 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -887,6 +887,10 @@ got: | |||
| 887 | struct buffer_head *block_bitmap_bh; | 887 | struct buffer_head *block_bitmap_bh; |
| 888 | 888 | ||
| 889 | block_bitmap_bh = ext4_read_block_bitmap(sb, group); | 889 | block_bitmap_bh = ext4_read_block_bitmap(sb, group); |
| 890 | if (!block_bitmap_bh) { | ||
| 891 | err = -EIO; | ||
| 892 | goto out; | ||
| 893 | } | ||
| 890 | BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); | 894 | BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); |
| 891 | err = ext4_journal_get_write_access(handle, block_bitmap_bh); | 895 | err = ext4_journal_get_write_access(handle, block_bitmap_bh); |
| 892 | if (err) { | 896 | if (err) { |
| @@ -1011,8 +1015,7 @@ got: | |||
| 1011 | spin_unlock(&sbi->s_next_gen_lock); | 1015 | spin_unlock(&sbi->s_next_gen_lock); |
| 1012 | 1016 | ||
| 1013 | /* Precompute checksum seed for inode metadata */ | 1017 | /* Precompute checksum seed for inode metadata */ |
| 1014 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1018 | if (ext4_has_metadata_csum(sb)) { |
| 1015 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
| 1016 | __u32 csum; | 1019 | __u32 csum; |
| 1017 | __le32 inum = cpu_to_le32(inode->i_ino); | 1020 | __le32 inum = cpu_to_le32(inode->i_ino); |
| 1018 | __le32 gen = cpu_to_le32(inode->i_generation); | 1021 | __le32 gen = cpu_to_le32(inode->i_generation); |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index e75f840000a0..36b369697a13 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
| @@ -318,34 +318,24 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | |||
| 318 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | 318 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain |
| 319 | * as described above and return 0. | 319 | * as described above and return 0. |
| 320 | */ | 320 | */ |
| 321 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 321 | static int ext4_alloc_branch(handle_t *handle, |
| 322 | ext4_lblk_t iblock, int indirect_blks, | 322 | struct ext4_allocation_request *ar, |
| 323 | int *blks, ext4_fsblk_t goal, | 323 | int indirect_blks, ext4_lblk_t *offsets, |
| 324 | ext4_lblk_t *offsets, Indirect *branch) | 324 | Indirect *branch) |
| 325 | { | 325 | { |
| 326 | struct ext4_allocation_request ar; | ||
| 327 | struct buffer_head * bh; | 326 | struct buffer_head * bh; |
| 328 | ext4_fsblk_t b, new_blocks[4]; | 327 | ext4_fsblk_t b, new_blocks[4]; |
| 329 | __le32 *p; | 328 | __le32 *p; |
| 330 | int i, j, err, len = 1; | 329 | int i, j, err, len = 1; |
| 331 | 330 | ||
| 332 | /* | ||
| 333 | * Set up for the direct block allocation | ||
| 334 | */ | ||
| 335 | memset(&ar, 0, sizeof(ar)); | ||
| 336 | ar.inode = inode; | ||
| 337 | ar.len = *blks; | ||
| 338 | ar.logical = iblock; | ||
| 339 | if (S_ISREG(inode->i_mode)) | ||
| 340 | ar.flags = EXT4_MB_HINT_DATA; | ||
| 341 | |||
| 342 | for (i = 0; i <= indirect_blks; i++) { | 331 | for (i = 0; i <= indirect_blks; i++) { |
| 343 | if (i == indirect_blks) { | 332 | if (i == indirect_blks) { |
| 344 | ar.goal = goal; | 333 | new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); |
| 345 | new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err); | ||
| 346 | } else | 334 | } else |
| 347 | goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, | 335 | ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, |
| 348 | goal, 0, NULL, &err); | 336 | ar->inode, ar->goal, |
| 337 | ar->flags & EXT4_MB_DELALLOC_RESERVED, | ||
| 338 | NULL, &err); | ||
| 349 | if (err) { | 339 | if (err) { |
| 350 | i--; | 340 | i--; |
| 351 | goto failed; | 341 | goto failed; |
| @@ -354,7 +344,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 354 | if (i == 0) | 344 | if (i == 0) |
| 355 | continue; | 345 | continue; |
| 356 | 346 | ||
| 357 | bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); | 347 | bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); |
| 358 | if (unlikely(!bh)) { | 348 | if (unlikely(!bh)) { |
| 359 | err = -ENOMEM; | 349 | err = -ENOMEM; |
| 360 | goto failed; | 350 | goto failed; |
| @@ -372,7 +362,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 372 | b = new_blocks[i]; | 362 | b = new_blocks[i]; |
| 373 | 363 | ||
| 374 | if (i == indirect_blks) | 364 | if (i == indirect_blks) |
| 375 | len = ar.len; | 365 | len = ar->len; |
| 376 | for (j = 0; j < len; j++) | 366 | for (j = 0; j < len; j++) |
| 377 | *p++ = cpu_to_le32(b++); | 367 | *p++ = cpu_to_le32(b++); |
| 378 | 368 | ||
| @@ -381,11 +371,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 381 | unlock_buffer(bh); | 371 | unlock_buffer(bh); |
| 382 | 372 | ||
| 383 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 373 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
| 384 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 374 | err = ext4_handle_dirty_metadata(handle, ar->inode, bh); |
| 385 | if (err) | 375 | if (err) |
| 386 | goto failed; | 376 | goto failed; |
| 387 | } | 377 | } |
| 388 | *blks = ar.len; | ||
| 389 | return 0; | 378 | return 0; |
| 390 | failed: | 379 | failed: |
| 391 | for (; i >= 0; i--) { | 380 | for (; i >= 0; i--) { |
| @@ -396,10 +385,10 @@ failed: | |||
| 396 | * existing before ext4_alloc_branch() was called. | 385 | * existing before ext4_alloc_branch() was called. |
| 397 | */ | 386 | */ |
| 398 | if (i > 0 && i != indirect_blks && branch[i].bh) | 387 | if (i > 0 && i != indirect_blks && branch[i].bh) |
| 399 | ext4_forget(handle, 1, inode, branch[i].bh, | 388 | ext4_forget(handle, 1, ar->inode, branch[i].bh, |
| 400 | branch[i].bh->b_blocknr); | 389 | branch[i].bh->b_blocknr); |
| 401 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], | 390 | ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], |
| 402 | (i == indirect_blks) ? ar.len : 1, 0); | 391 | (i == indirect_blks) ? ar->len : 1, 0); |
| 403 | } | 392 | } |
| 404 | return err; | 393 | return err; |
| 405 | } | 394 | } |
| @@ -419,9 +408,9 @@ failed: | |||
| 419 | * inode (->i_blocks, etc.). In case of success we end up with the full | 408 | * inode (->i_blocks, etc.). In case of success we end up with the full |
| 420 | * chain to new block and return 0. | 409 | * chain to new block and return 0. |
| 421 | */ | 410 | */ |
| 422 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | 411 | static int ext4_splice_branch(handle_t *handle, |
| 423 | ext4_lblk_t block, Indirect *where, int num, | 412 | struct ext4_allocation_request *ar, |
| 424 | int blks) | 413 | Indirect *where, int num) |
| 425 | { | 414 | { |
| 426 | int i; | 415 | int i; |
| 427 | int err = 0; | 416 | int err = 0; |
| @@ -446,9 +435,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
| 446 | * Update the host buffer_head or inode to point to more just allocated | 435 | * Update the host buffer_head or inode to point to more just allocated |
| 447 | * direct blocks blocks | 436 | * direct blocks blocks |
| 448 | */ | 437 | */ |
| 449 | if (num == 0 && blks > 1) { | 438 | if (num == 0 && ar->len > 1) { |
| 450 | current_block = le32_to_cpu(where->key) + 1; | 439 | current_block = le32_to_cpu(where->key) + 1; |
| 451 | for (i = 1; i < blks; i++) | 440 | for (i = 1; i < ar->len; i++) |
| 452 | *(where->p + i) = cpu_to_le32(current_block++); | 441 | *(where->p + i) = cpu_to_le32(current_block++); |
| 453 | } | 442 | } |
| 454 | 443 | ||
| @@ -465,14 +454,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
| 465 | */ | 454 | */ |
| 466 | jbd_debug(5, "splicing indirect only\n"); | 455 | jbd_debug(5, "splicing indirect only\n"); |
| 467 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | 456 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); |
| 468 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); | 457 | err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); |
| 469 | if (err) | 458 | if (err) |
| 470 | goto err_out; | 459 | goto err_out; |
| 471 | } else { | 460 | } else { |
| 472 | /* | 461 | /* |
| 473 | * OK, we spliced it into the inode itself on a direct block. | 462 | * OK, we spliced it into the inode itself on a direct block. |
| 474 | */ | 463 | */ |
| 475 | ext4_mark_inode_dirty(handle, inode); | 464 | ext4_mark_inode_dirty(handle, ar->inode); |
| 476 | jbd_debug(5, "splicing direct\n"); | 465 | jbd_debug(5, "splicing direct\n"); |
| 477 | } | 466 | } |
| 478 | return err; | 467 | return err; |
| @@ -484,11 +473,11 @@ err_out: | |||
| 484 | * need to revoke the block, which is why we don't | 473 | * need to revoke the block, which is why we don't |
| 485 | * need to set EXT4_FREE_BLOCKS_METADATA. | 474 | * need to set EXT4_FREE_BLOCKS_METADATA. |
| 486 | */ | 475 | */ |
| 487 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 476 | ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, |
| 488 | EXT4_FREE_BLOCKS_FORGET); | 477 | EXT4_FREE_BLOCKS_FORGET); |
| 489 | } | 478 | } |
| 490 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), | 479 | ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), |
| 491 | blks, 0); | 480 | ar->len, 0); |
| 492 | 481 | ||
| 493 | return err; | 482 | return err; |
| 494 | } | 483 | } |
| @@ -525,11 +514,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
| 525 | struct ext4_map_blocks *map, | 514 | struct ext4_map_blocks *map, |
| 526 | int flags) | 515 | int flags) |
| 527 | { | 516 | { |
| 517 | struct ext4_allocation_request ar; | ||
| 528 | int err = -EIO; | 518 | int err = -EIO; |
| 529 | ext4_lblk_t offsets[4]; | 519 | ext4_lblk_t offsets[4]; |
| 530 | Indirect chain[4]; | 520 | Indirect chain[4]; |
| 531 | Indirect *partial; | 521 | Indirect *partial; |
| 532 | ext4_fsblk_t goal; | ||
| 533 | int indirect_blks; | 522 | int indirect_blks; |
| 534 | int blocks_to_boundary = 0; | 523 | int blocks_to_boundary = 0; |
| 535 | int depth; | 524 | int depth; |
| @@ -579,7 +568,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
| 579 | return -ENOSPC; | 568 | return -ENOSPC; |
| 580 | } | 569 | } |
| 581 | 570 | ||
| 582 | goal = ext4_find_goal(inode, map->m_lblk, partial); | 571 | /* Set up for the direct block allocation */ |
| 572 | memset(&ar, 0, sizeof(ar)); | ||
| 573 | ar.inode = inode; | ||
| 574 | ar.logical = map->m_lblk; | ||
| 575 | if (S_ISREG(inode->i_mode)) | ||
| 576 | ar.flags = EXT4_MB_HINT_DATA; | ||
| 577 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
| 578 | ar.flags |= EXT4_MB_DELALLOC_RESERVED; | ||
| 579 | |||
| 580 | ar.goal = ext4_find_goal(inode, map->m_lblk, partial); | ||
| 583 | 581 | ||
| 584 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 582 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
| 585 | indirect_blks = (chain + depth) - partial - 1; | 583 | indirect_blks = (chain + depth) - partial - 1; |
| @@ -588,13 +586,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
| 588 | * Next look up the indirect map to count the totoal number of | 586 | * Next look up the indirect map to count the totoal number of |
| 589 | * direct blocks to allocate for this branch. | 587 | * direct blocks to allocate for this branch. |
| 590 | */ | 588 | */ |
| 591 | count = ext4_blks_to_allocate(partial, indirect_blks, | 589 | ar.len = ext4_blks_to_allocate(partial, indirect_blks, |
| 592 | map->m_len, blocks_to_boundary); | 590 | map->m_len, blocks_to_boundary); |
| 591 | |||
| 593 | /* | 592 | /* |
| 594 | * Block out ext4_truncate while we alter the tree | 593 | * Block out ext4_truncate while we alter the tree |
| 595 | */ | 594 | */ |
| 596 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, | 595 | err = ext4_alloc_branch(handle, &ar, indirect_blks, |
| 597 | &count, goal, | ||
| 598 | offsets + (partial - chain), partial); | 596 | offsets + (partial - chain), partial); |
| 599 | 597 | ||
| 600 | /* | 598 | /* |
| @@ -605,14 +603,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
| 605 | * may need to return -EAGAIN upwards in the worst case. --sct | 603 | * may need to return -EAGAIN upwards in the worst case. --sct |
| 606 | */ | 604 | */ |
| 607 | if (!err) | 605 | if (!err) |
| 608 | err = ext4_splice_branch(handle, inode, map->m_lblk, | 606 | err = ext4_splice_branch(handle, &ar, partial, indirect_blks); |
| 609 | partial, indirect_blks, count); | ||
| 610 | if (err) | 607 | if (err) |
| 611 | goto cleanup; | 608 | goto cleanup; |
| 612 | 609 | ||
| 613 | map->m_flags |= EXT4_MAP_NEW; | 610 | map->m_flags |= EXT4_MAP_NEW; |
| 614 | 611 | ||
| 615 | ext4_update_inode_fsync_trans(handle, inode, 1); | 612 | ext4_update_inode_fsync_trans(handle, inode, 1); |
| 613 | count = ar.len; | ||
| 616 | got_it: | 614 | got_it: |
| 617 | map->m_flags |= EXT4_MAP_MAPPED; | 615 | map->m_flags |= EXT4_MAP_MAPPED; |
| 618 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | 616 | map->m_pblk = le32_to_cpu(chain[depth-1].key); |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index bea662bd0ca6..3ea62695abce 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
| @@ -594,6 +594,7 @@ retry: | |||
| 594 | if (ret) { | 594 | if (ret) { |
| 595 | unlock_page(page); | 595 | unlock_page(page); |
| 596 | page_cache_release(page); | 596 | page_cache_release(page); |
| 597 | page = NULL; | ||
| 597 | ext4_orphan_add(handle, inode); | 598 | ext4_orphan_add(handle, inode); |
| 598 | up_write(&EXT4_I(inode)->xattr_sem); | 599 | up_write(&EXT4_I(inode)->xattr_sem); |
| 599 | sem_held = 0; | 600 | sem_held = 0; |
| @@ -613,7 +614,8 @@ retry: | |||
| 613 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 614 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
| 614 | goto retry; | 615 | goto retry; |
| 615 | 616 | ||
| 616 | block_commit_write(page, from, to); | 617 | if (page) |
| 618 | block_commit_write(page, from, to); | ||
| 617 | out: | 619 | out: |
| 618 | if (page) { | 620 | if (page) { |
| 619 | unlock_page(page); | 621 | unlock_page(page); |
| @@ -1126,8 +1128,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, | |||
| 1126 | memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, | 1128 | memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, |
| 1127 | inline_size - EXT4_INLINE_DOTDOT_SIZE); | 1129 | inline_size - EXT4_INLINE_DOTDOT_SIZE); |
| 1128 | 1130 | ||
| 1129 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1131 | if (ext4_has_metadata_csum(inode->i_sb)) |
| 1130 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 1131 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1132 | csum_size = sizeof(struct ext4_dir_entry_tail); |
| 1132 | 1133 | ||
| 1133 | inode->i_size = inode->i_sb->s_blocksize; | 1134 | inode->i_size = inode->i_sb->s_blocksize; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3aa26e9117c4..3356ab5395f4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, | |||
| 83 | 83 | ||
| 84 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 84 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
| 85 | cpu_to_le32(EXT4_OS_LINUX) || | 85 | cpu_to_le32(EXT4_OS_LINUX) || |
| 86 | !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 86 | !ext4_has_metadata_csum(inode->i_sb)) |
| 87 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 88 | return 1; | 87 | return 1; |
| 89 | 88 | ||
| 90 | provided = le16_to_cpu(raw->i_checksum_lo); | 89 | provided = le16_to_cpu(raw->i_checksum_lo); |
| @@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, | |||
| 105 | 104 | ||
| 106 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 105 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
| 107 | cpu_to_le32(EXT4_OS_LINUX) || | 106 | cpu_to_le32(EXT4_OS_LINUX) || |
| 108 | !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 107 | !ext4_has_metadata_csum(inode->i_sb)) |
| 109 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 110 | return; | 108 | return; |
| 111 | 109 | ||
| 112 | csum = ext4_inode_csum(inode, raw, ei); | 110 | csum = ext4_inode_csum(inode, raw, ei); |
| @@ -224,16 +222,15 @@ void ext4_evict_inode(struct inode *inode) | |||
| 224 | goto no_delete; | 222 | goto no_delete; |
| 225 | } | 223 | } |
| 226 | 224 | ||
| 227 | if (!is_bad_inode(inode)) | 225 | if (is_bad_inode(inode)) |
| 228 | dquot_initialize(inode); | 226 | goto no_delete; |
| 227 | dquot_initialize(inode); | ||
| 229 | 228 | ||
| 230 | if (ext4_should_order_data(inode)) | 229 | if (ext4_should_order_data(inode)) |
| 231 | ext4_begin_ordered_truncate(inode, 0); | 230 | ext4_begin_ordered_truncate(inode, 0); |
| 232 | truncate_inode_pages_final(&inode->i_data); | 231 | truncate_inode_pages_final(&inode->i_data); |
| 233 | 232 | ||
| 234 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 233 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
| 235 | if (is_bad_inode(inode)) | ||
| 236 | goto no_delete; | ||
| 237 | 234 | ||
| 238 | /* | 235 | /* |
| 239 | * Protect us against freezing - iput() caller didn't have to have any | 236 | * Protect us against freezing - iput() caller didn't have to have any |
| @@ -590,20 +587,12 @@ found: | |||
| 590 | /* | 587 | /* |
| 591 | * New blocks allocate and/or writing to unwritten extent | 588 | * New blocks allocate and/or writing to unwritten extent |
| 592 | * will possibly result in updating i_data, so we take | 589 | * will possibly result in updating i_data, so we take |
| 593 | * the write lock of i_data_sem, and call get_blocks() | 590 | * the write lock of i_data_sem, and call get_block() |
| 594 | * with create == 1 flag. | 591 | * with create == 1 flag. |
| 595 | */ | 592 | */ |
| 596 | down_write(&EXT4_I(inode)->i_data_sem); | 593 | down_write(&EXT4_I(inode)->i_data_sem); |
| 597 | 594 | ||
| 598 | /* | 595 | /* |
| 599 | * if the caller is from delayed allocation writeout path | ||
| 600 | * we have already reserved fs blocks for allocation | ||
| 601 | * let the underlying get_block() function know to | ||
| 602 | * avoid double accounting | ||
| 603 | */ | ||
| 604 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
| 605 | ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | ||
| 606 | /* | ||
| 607 | * We need to check for EXT4 here because migrate | 596 | * We need to check for EXT4 here because migrate |
| 608 | * could have changed the inode type in between | 597 | * could have changed the inode type in between |
| 609 | */ | 598 | */ |
| @@ -631,8 +620,6 @@ found: | |||
| 631 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 620 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
| 632 | ext4_da_update_reserve_space(inode, retval, 1); | 621 | ext4_da_update_reserve_space(inode, retval, 1); |
| 633 | } | 622 | } |
| 634 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | ||
| 635 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | ||
| 636 | 623 | ||
| 637 | if (retval > 0) { | 624 | if (retval > 0) { |
| 638 | unsigned int status; | 625 | unsigned int status; |
| @@ -734,11 +721,11 @@ int ext4_get_block(struct inode *inode, sector_t iblock, | |||
| 734 | * `handle' can be NULL if create is zero | 721 | * `handle' can be NULL if create is zero |
| 735 | */ | 722 | */ |
| 736 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 723 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
| 737 | ext4_lblk_t block, int create, int *errp) | 724 | ext4_lblk_t block, int create) |
| 738 | { | 725 | { |
| 739 | struct ext4_map_blocks map; | 726 | struct ext4_map_blocks map; |
| 740 | struct buffer_head *bh; | 727 | struct buffer_head *bh; |
| 741 | int fatal = 0, err; | 728 | int err; |
| 742 | 729 | ||
| 743 | J_ASSERT(handle != NULL || create == 0); | 730 | J_ASSERT(handle != NULL || create == 0); |
| 744 | 731 | ||
| @@ -747,21 +734,14 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
| 747 | err = ext4_map_blocks(handle, inode, &map, | 734 | err = ext4_map_blocks(handle, inode, &map, |
| 748 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 735 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
| 749 | 736 | ||
| 750 | /* ensure we send some value back into *errp */ | 737 | if (err == 0) |
| 751 | *errp = 0; | 738 | return create ? ERR_PTR(-ENOSPC) : NULL; |
| 752 | |||
| 753 | if (create && err == 0) | ||
| 754 | err = -ENOSPC; /* should never happen */ | ||
| 755 | if (err < 0) | 739 | if (err < 0) |
| 756 | *errp = err; | 740 | return ERR_PTR(err); |
| 757 | if (err <= 0) | ||
| 758 | return NULL; | ||
| 759 | 741 | ||
| 760 | bh = sb_getblk(inode->i_sb, map.m_pblk); | 742 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
| 761 | if (unlikely(!bh)) { | 743 | if (unlikely(!bh)) |
| 762 | *errp = -ENOMEM; | 744 | return ERR_PTR(-ENOMEM); |
| 763 | return NULL; | ||
| 764 | } | ||
| 765 | if (map.m_flags & EXT4_MAP_NEW) { | 745 | if (map.m_flags & EXT4_MAP_NEW) { |
| 766 | J_ASSERT(create != 0); | 746 | J_ASSERT(create != 0); |
| 767 | J_ASSERT(handle != NULL); | 747 | J_ASSERT(handle != NULL); |
| @@ -775,44 +755,44 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
| 775 | */ | 755 | */ |
| 776 | lock_buffer(bh); | 756 | lock_buffer(bh); |
| 777 | BUFFER_TRACE(bh, "call get_create_access"); | 757 | BUFFER_TRACE(bh, "call get_create_access"); |
| 778 | fatal = ext4_journal_get_create_access(handle, bh); | 758 | err = ext4_journal_get_create_access(handle, bh); |
| 779 | if (!fatal && !buffer_uptodate(bh)) { | 759 | if (unlikely(err)) { |
| 760 | unlock_buffer(bh); | ||
| 761 | goto errout; | ||
| 762 | } | ||
| 763 | if (!buffer_uptodate(bh)) { | ||
| 780 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); | 764 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
| 781 | set_buffer_uptodate(bh); | 765 | set_buffer_uptodate(bh); |
| 782 | } | 766 | } |
| 783 | unlock_buffer(bh); | 767 | unlock_buffer(bh); |
| 784 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 768 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
| 785 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 769 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
| 786 | if (!fatal) | 770 | if (unlikely(err)) |
| 787 | fatal = err; | 771 | goto errout; |
| 788 | } else { | 772 | } else |
| 789 | BUFFER_TRACE(bh, "not a new buffer"); | 773 | BUFFER_TRACE(bh, "not a new buffer"); |
| 790 | } | ||
| 791 | if (fatal) { | ||
| 792 | *errp = fatal; | ||
| 793 | brelse(bh); | ||
| 794 | bh = NULL; | ||
| 795 | } | ||
| 796 | return bh; | 774 | return bh; |
| 775 | errout: | ||
| 776 | brelse(bh); | ||
| 777 | return ERR_PTR(err); | ||
| 797 | } | 778 | } |
| 798 | 779 | ||
| 799 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 780 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
| 800 | ext4_lblk_t block, int create, int *err) | 781 | ext4_lblk_t block, int create) |
| 801 | { | 782 | { |
| 802 | struct buffer_head *bh; | 783 | struct buffer_head *bh; |
| 803 | 784 | ||
| 804 | bh = ext4_getblk(handle, inode, block, create, err); | 785 | bh = ext4_getblk(handle, inode, block, create); |
| 805 | if (!bh) | 786 | if (IS_ERR(bh)) |
| 806 | return bh; | 787 | return bh; |
| 807 | if (buffer_uptodate(bh)) | 788 | if (!bh || buffer_uptodate(bh)) |
| 808 | return bh; | 789 | return bh; |
| 809 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); | 790 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
| 810 | wait_on_buffer(bh); | 791 | wait_on_buffer(bh); |
| 811 | if (buffer_uptodate(bh)) | 792 | if (buffer_uptodate(bh)) |
| 812 | return bh; | 793 | return bh; |
| 813 | put_bh(bh); | 794 | put_bh(bh); |
| 814 | *err = -EIO; | 795 | return ERR_PTR(-EIO); |
| 815 | return NULL; | ||
| 816 | } | 796 | } |
| 817 | 797 | ||
| 818 | int ext4_walk_page_buffers(handle_t *handle, | 798 | int ext4_walk_page_buffers(handle_t *handle, |
| @@ -1536,7 +1516,7 @@ out_unlock: | |||
| 1536 | } | 1516 | } |
| 1537 | 1517 | ||
| 1538 | /* | 1518 | /* |
| 1539 | * This is a special get_blocks_t callback which is used by | 1519 | * This is a special get_block_t callback which is used by |
| 1540 | * ext4_da_write_begin(). It will either return mapped block or | 1520 | * ext4_da_write_begin(). It will either return mapped block or |
| 1541 | * reserve space for a single block. | 1521 | * reserve space for a single block. |
| 1542 | * | 1522 | * |
| @@ -2011,12 +1991,10 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | |||
| 2011 | * in data loss. So use reserved blocks to allocate metadata if | 1991 | * in data loss. So use reserved blocks to allocate metadata if |
| 2012 | * possible. | 1992 | * possible. |
| 2013 | * | 1993 | * |
| 2014 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks | 1994 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if |
| 2015 | * in question are delalloc blocks. This affects functions in many | 1995 | * the blocks in question are delalloc blocks. This indicates |
| 2016 | * different parts of the allocation call path. This flag exists | 1996 | * that the blocks and quotas has already been checked when |
| 2017 | * primarily because we don't want to change *many* call functions, so | 1997 | * the data was copied into the page cache. |
| 2018 | * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag | ||
| 2019 | * once the inode's allocation semaphore is taken. | ||
| 2020 | */ | 1998 | */ |
| 2021 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | 1999 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
| 2022 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | 2000 | EXT4_GET_BLOCKS_METADATA_NOFAIL; |
| @@ -2515,6 +2493,20 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
| 2515 | return 0; | 2493 | return 0; |
| 2516 | } | 2494 | } |
| 2517 | 2495 | ||
| 2496 | /* We always reserve for an inode update; the superblock could be there too */ | ||
| 2497 | static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) | ||
| 2498 | { | ||
| 2499 | if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
| 2500 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) | ||
| 2501 | return 1; | ||
| 2502 | |||
| 2503 | if (pos + len <= 0x7fffffffULL) | ||
| 2504 | return 1; | ||
| 2505 | |||
| 2506 | /* We might need to update the superblock to set LARGE_FILE */ | ||
| 2507 | return 2; | ||
| 2508 | } | ||
| 2509 | |||
| 2518 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2510 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
| 2519 | loff_t pos, unsigned len, unsigned flags, | 2511 | loff_t pos, unsigned len, unsigned flags, |
| 2520 | struct page **pagep, void **fsdata) | 2512 | struct page **pagep, void **fsdata) |
| @@ -2565,7 +2557,8 @@ retry_grab: | |||
| 2565 | * of file which has an already mapped buffer. | 2557 | * of file which has an already mapped buffer. |
| 2566 | */ | 2558 | */ |
| 2567 | retry_journal: | 2559 | retry_journal: |
| 2568 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); | 2560 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
| 2561 | ext4_da_write_credits(inode, pos, len)); | ||
| 2569 | if (IS_ERR(handle)) { | 2562 | if (IS_ERR(handle)) { |
| 2570 | page_cache_release(page); | 2563 | page_cache_release(page); |
| 2571 | return PTR_ERR(handle); | 2564 | return PTR_ERR(handle); |
| @@ -2658,10 +2651,7 @@ static int ext4_da_write_end(struct file *file, | |||
| 2658 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { | 2651 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { |
| 2659 | if (ext4_has_inline_data(inode) || | 2652 | if (ext4_has_inline_data(inode) || |
| 2660 | ext4_da_should_update_i_disksize(page, end)) { | 2653 | ext4_da_should_update_i_disksize(page, end)) { |
| 2661 | down_write(&EXT4_I(inode)->i_data_sem); | 2654 | ext4_update_i_disksize(inode, new_i_size); |
| 2662 | if (new_i_size > EXT4_I(inode)->i_disksize) | ||
| 2663 | EXT4_I(inode)->i_disksize = new_i_size; | ||
| 2664 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2665 | /* We need to mark inode dirty even if | 2655 | /* We need to mark inode dirty even if |
| 2666 | * new_i_size is less that inode->i_size | 2656 | * new_i_size is less that inode->i_size |
| 2667 | * bu greater than i_disksize.(hint delalloc) | 2657 | * bu greater than i_disksize.(hint delalloc) |
| @@ -3936,8 +3926,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 3936 | ei->i_extra_isize = 0; | 3926 | ei->i_extra_isize = 0; |
| 3937 | 3927 | ||
| 3938 | /* Precompute checksum seed for inode metadata */ | 3928 | /* Precompute checksum seed for inode metadata */ |
| 3939 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3929 | if (ext4_has_metadata_csum(sb)) { |
| 3940 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
| 3941 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 3930 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 3942 | __u32 csum; | 3931 | __u32 csum; |
| 3943 | __le32 inum = cpu_to_le32(inode->i_ino); | 3932 | __le32 inum = cpu_to_le32(inode->i_ino); |
| @@ -4127,6 +4116,13 @@ bad_inode: | |||
| 4127 | return ERR_PTR(ret); | 4116 | return ERR_PTR(ret); |
| 4128 | } | 4117 | } |
| 4129 | 4118 | ||
| 4119 | struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) | ||
| 4120 | { | ||
| 4121 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) | ||
| 4122 | return ERR_PTR(-EIO); | ||
| 4123 | return ext4_iget(sb, ino); | ||
| 4124 | } | ||
| 4125 | |||
| 4130 | static int ext4_inode_blocks_set(handle_t *handle, | 4126 | static int ext4_inode_blocks_set(handle_t *handle, |
| 4131 | struct ext4_inode *raw_inode, | 4127 | struct ext4_inode *raw_inode, |
| 4132 | struct ext4_inode_info *ei) | 4128 | struct ext4_inode_info *ei) |
| @@ -4226,7 +4222,8 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 4226 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); | 4222 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); |
| 4227 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); | 4223 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); |
| 4228 | 4224 | ||
| 4229 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) { | 4225 | err = ext4_inode_blocks_set(handle, raw_inode, ei); |
| 4226 | if (err) { | ||
| 4230 | spin_unlock(&ei->i_raw_lock); | 4227 | spin_unlock(&ei->i_raw_lock); |
| 4231 | goto out_brelse; | 4228 | goto out_brelse; |
| 4232 | } | 4229 | } |
| @@ -4536,8 +4533,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 4536 | ext4_orphan_del(NULL, inode); | 4533 | ext4_orphan_del(NULL, inode); |
| 4537 | goto err_out; | 4534 | goto err_out; |
| 4538 | } | 4535 | } |
| 4539 | } else | 4536 | } else { |
| 4537 | loff_t oldsize = inode->i_size; | ||
| 4538 | |||
| 4540 | i_size_write(inode, attr->ia_size); | 4539 | i_size_write(inode, attr->ia_size); |
| 4540 | pagecache_isize_extended(inode, oldsize, inode->i_size); | ||
| 4541 | } | ||
| 4541 | 4542 | ||
| 4542 | /* | 4543 | /* |
| 4543 | * Blocks are going to be removed from the inode. Wait | 4544 | * Blocks are going to be removed from the inode. Wait |
| @@ -4958,7 +4959,12 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 4958 | if (val) | 4959 | if (val) |
| 4959 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4960 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
| 4960 | else { | 4961 | else { |
| 4961 | jbd2_journal_flush(journal); | 4962 | err = jbd2_journal_flush(journal); |
| 4963 | if (err < 0) { | ||
| 4964 | jbd2_journal_unlock_updates(journal); | ||
| 4965 | ext4_inode_resume_unlocked_dio(inode); | ||
| 4966 | return err; | ||
| 4967 | } | ||
| 4962 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4968 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
| 4963 | } | 4969 | } |
| 4964 | ext4_set_aops(inode); | 4970 | ext4_set_aops(inode); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 0f2252ec274d..bfda18a15592 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
| @@ -331,8 +331,7 @@ flags_out: | |||
| 331 | if (!inode_owner_or_capable(inode)) | 331 | if (!inode_owner_or_capable(inode)) |
| 332 | return -EPERM; | 332 | return -EPERM; |
| 333 | 333 | ||
| 334 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 334 | if (ext4_has_metadata_csum(inode->i_sb)) { |
| 335 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
| 336 | ext4_warning(sb, "Setting inode version is not " | 335 | ext4_warning(sb, "Setting inode version is not " |
| 337 | "supported with metadata_csum enabled."); | 336 | "supported with metadata_csum enabled."); |
| 338 | return -ENOTTY; | 337 | return -ENOTTY; |
| @@ -532,9 +531,17 @@ group_add_out: | |||
| 532 | } | 531 | } |
| 533 | 532 | ||
| 534 | case EXT4_IOC_SWAP_BOOT: | 533 | case EXT4_IOC_SWAP_BOOT: |
| 534 | { | ||
| 535 | int err; | ||
| 535 | if (!(filp->f_mode & FMODE_WRITE)) | 536 | if (!(filp->f_mode & FMODE_WRITE)) |
| 536 | return -EBADF; | 537 | return -EBADF; |
| 537 | return swap_inode_boot_loader(sb, inode); | 538 | err = mnt_want_write_file(filp); |
| 539 | if (err) | ||
| 540 | return err; | ||
| 541 | err = swap_inode_boot_loader(sb, inode); | ||
| 542 | mnt_drop_write_file(filp); | ||
| 543 | return err; | ||
| 544 | } | ||
| 538 | 545 | ||
| 539 | case EXT4_IOC_RESIZE_FS: { | 546 | case EXT4_IOC_RESIZE_FS: { |
| 540 | ext4_fsblk_t n_blocks_count; | 547 | ext4_fsblk_t n_blocks_count; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 748c9136a60a..dbfe15c2533c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -3155,9 +3155,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
| 3155 | "start %lu, size %lu, fe_logical %lu", | 3155 | "start %lu, size %lu, fe_logical %lu", |
| 3156 | (unsigned long) start, (unsigned long) size, | 3156 | (unsigned long) start, (unsigned long) size, |
| 3157 | (unsigned long) ac->ac_o_ex.fe_logical); | 3157 | (unsigned long) ac->ac_o_ex.fe_logical); |
| 3158 | BUG(); | ||
| 3158 | } | 3159 | } |
| 3159 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && | ||
| 3160 | start > ac->ac_o_ex.fe_logical); | ||
| 3161 | BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | 3160 | BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); |
| 3162 | 3161 | ||
| 3163 | /* now prepare goal request */ | 3162 | /* now prepare goal request */ |
| @@ -4410,14 +4409,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4410 | if (IS_NOQUOTA(ar->inode)) | 4409 | if (IS_NOQUOTA(ar->inode)) |
| 4411 | ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; | 4410 | ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; |
| 4412 | 4411 | ||
| 4413 | /* | 4412 | if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { |
| 4414 | * For delayed allocation, we could skip the ENOSPC and | ||
| 4415 | * EDQUOT check, as blocks and quotas have been already | ||
| 4416 | * reserved when data being copied into pagecache. | ||
| 4417 | */ | ||
| 4418 | if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) | ||
| 4419 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | ||
| 4420 | else { | ||
| 4421 | /* Without delayed allocation we need to verify | 4413 | /* Without delayed allocation we need to verify |
| 4422 | * there is enough free blocks to do block allocation | 4414 | * there is enough free blocks to do block allocation |
| 4423 | * and verify allocation doesn't exceed the quota limits. | 4415 | * and verify allocation doesn't exceed the quota limits. |
| @@ -4528,8 +4520,7 @@ out: | |||
| 4528 | if (inquota && ar->len < inquota) | 4520 | if (inquota && ar->len < inquota) |
| 4529 | dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); | 4521 | dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); |
| 4530 | if (!ar->len) { | 4522 | if (!ar->len) { |
| 4531 | if (!ext4_test_inode_state(ar->inode, | 4523 | if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) |
| 4532 | EXT4_STATE_DELALLOC_RESERVED)) | ||
| 4533 | /* release all the reserved blocks if non delalloc */ | 4524 | /* release all the reserved blocks if non delalloc */ |
| 4534 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, | 4525 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
| 4535 | reserv_clstrs); | 4526 | reserv_clstrs); |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index d3567f27bae7..a432634f2e6a 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
| @@ -41,8 +41,7 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
| 41 | ext4_ext_store_pblock(&newext, lb->first_pblock); | 41 | ext4_ext_store_pblock(&newext, lb->first_pblock); |
| 42 | /* Locking only for convinience since we are operating on temp inode */ | 42 | /* Locking only for convinience since we are operating on temp inode */ |
| 43 | down_write(&EXT4_I(inode)->i_data_sem); | 43 | down_write(&EXT4_I(inode)->i_data_sem); |
| 44 | path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); | 44 | path = ext4_find_extent(inode, lb->first_block, NULL, 0); |
| 45 | |||
| 46 | if (IS_ERR(path)) { | 45 | if (IS_ERR(path)) { |
| 47 | retval = PTR_ERR(path); | 46 | retval = PTR_ERR(path); |
| 48 | path = NULL; | 47 | path = NULL; |
| @@ -81,13 +80,11 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
| 81 | goto err_out; | 80 | goto err_out; |
| 82 | } | 81 | } |
| 83 | } | 82 | } |
| 84 | retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); | 83 | retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); |
| 85 | err_out: | 84 | err_out: |
| 86 | up_write((&EXT4_I(inode)->i_data_sem)); | 85 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 87 | if (path) { | 86 | ext4_ext_drop_refs(path); |
| 88 | ext4_ext_drop_refs(path); | 87 | kfree(path); |
| 89 | kfree(path); | ||
| 90 | } | ||
| 91 | lb->first_pblock = 0; | 88 | lb->first_pblock = 0; |
| 92 | return retval; | 89 | return retval; |
| 93 | } | 90 | } |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 32bce844c2e1..8313ca3324ec 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
| @@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) | |||
| 20 | 20 | ||
| 21 | static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) | 21 | static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) |
| 22 | { | 22 | { |
| 23 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 23 | if (!ext4_has_metadata_csum(sb)) |
| 24 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 25 | return 1; | 24 | return 1; |
| 26 | 25 | ||
| 27 | return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); | 26 | return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); |
| @@ -29,8 +28,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) | |||
| 29 | 28 | ||
| 30 | static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) | 29 | static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) |
| 31 | { | 30 | { |
| 32 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 31 | if (!ext4_has_metadata_csum(sb)) |
| 33 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 34 | return; | 32 | return; |
| 35 | 33 | ||
| 36 | mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); | 34 | mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 671a74b14fd7..9f2311bc9c4f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
| @@ -27,120 +27,26 @@ | |||
| 27 | * @lblock: logical block number to find an extent path | 27 | * @lblock: logical block number to find an extent path |
| 28 | * @path: pointer to an extent path pointer (for output) | 28 | * @path: pointer to an extent path pointer (for output) |
| 29 | * | 29 | * |
| 30 | * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value | 30 | * ext4_find_extent wrapper. Return 0 on success, or a negative error value |
| 31 | * on failure. | 31 | * on failure. |
| 32 | */ | 32 | */ |
| 33 | static inline int | 33 | static inline int |
| 34 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, | 34 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, |
| 35 | struct ext4_ext_path **orig_path) | 35 | struct ext4_ext_path **ppath) |
| 36 | { | 36 | { |
| 37 | int ret = 0; | ||
| 38 | struct ext4_ext_path *path; | 37 | struct ext4_ext_path *path; |
| 39 | 38 | ||
| 40 | path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); | 39 | path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE); |
| 41 | if (IS_ERR(path)) | 40 | if (IS_ERR(path)) |
| 42 | ret = PTR_ERR(path); | 41 | return PTR_ERR(path); |
| 43 | else if (path[ext_depth(inode)].p_ext == NULL) | 42 | if (path[ext_depth(inode)].p_ext == NULL) { |
| 44 | ret = -ENODATA; | 43 | ext4_ext_drop_refs(path); |
| 45 | else | 44 | kfree(path); |
| 46 | *orig_path = path; | 45 | *ppath = NULL; |
| 47 | 46 | return -ENODATA; | |
| 48 | return ret; | ||
| 49 | } | ||
| 50 | |||
| 51 | /** | ||
| 52 | * copy_extent_status - Copy the extent's initialization status | ||
| 53 | * | ||
| 54 | * @src: an extent for getting initialize status | ||
| 55 | * @dest: an extent to be set the status | ||
| 56 | */ | ||
| 57 | static void | ||
| 58 | copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) | ||
| 59 | { | ||
| 60 | if (ext4_ext_is_unwritten(src)) | ||
| 61 | ext4_ext_mark_unwritten(dest); | ||
| 62 | else | ||
| 63 | dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); | ||
| 64 | } | ||
| 65 | |||
| 66 | /** | ||
| 67 | * mext_next_extent - Search for the next extent and set it to "extent" | ||
| 68 | * | ||
| 69 | * @inode: inode which is searched | ||
| 70 | * @path: this will obtain data for the next extent | ||
| 71 | * @extent: pointer to the next extent we have just gotten | ||
| 72 | * | ||
| 73 | * Search the next extent in the array of ext4_ext_path structure (@path) | ||
| 74 | * and set it to ext4_extent structure (@extent). In addition, the member of | ||
| 75 | * @path (->p_ext) also points the next extent. Return 0 on success, 1 if | ||
| 76 | * ext4_ext_path structure refers to the last extent, or a negative error | ||
| 77 | * value on failure. | ||
| 78 | */ | ||
| 79 | int | ||
| 80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
| 81 | struct ext4_extent **extent) | ||
| 82 | { | ||
| 83 | struct ext4_extent_header *eh; | ||
| 84 | int ppos, leaf_ppos = path->p_depth; | ||
| 85 | |||
| 86 | ppos = leaf_ppos; | ||
| 87 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | ||
| 88 | /* leaf block */ | ||
| 89 | *extent = ++path[ppos].p_ext; | ||
| 90 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | while (--ppos >= 0) { | ||
| 95 | if (EXT_LAST_INDEX(path[ppos].p_hdr) > | ||
| 96 | path[ppos].p_idx) { | ||
| 97 | int cur_ppos = ppos; | ||
| 98 | |||
| 99 | /* index block */ | ||
| 100 | path[ppos].p_idx++; | ||
| 101 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); | ||
| 102 | if (path[ppos+1].p_bh) | ||
| 103 | brelse(path[ppos+1].p_bh); | ||
| 104 | path[ppos+1].p_bh = | ||
| 105 | sb_bread(inode->i_sb, path[ppos].p_block); | ||
| 106 | if (!path[ppos+1].p_bh) | ||
| 107 | return -EIO; | ||
| 108 | path[ppos+1].p_hdr = | ||
| 109 | ext_block_hdr(path[ppos+1].p_bh); | ||
| 110 | |||
| 111 | /* Halfway index block */ | ||
| 112 | while (++cur_ppos < leaf_ppos) { | ||
| 113 | path[cur_ppos].p_idx = | ||
| 114 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); | ||
| 115 | path[cur_ppos].p_block = | ||
| 116 | ext4_idx_pblock(path[cur_ppos].p_idx); | ||
| 117 | if (path[cur_ppos+1].p_bh) | ||
| 118 | brelse(path[cur_ppos+1].p_bh); | ||
| 119 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, | ||
| 120 | path[cur_ppos].p_block); | ||
| 121 | if (!path[cur_ppos+1].p_bh) | ||
| 122 | return -EIO; | ||
| 123 | path[cur_ppos+1].p_hdr = | ||
| 124 | ext_block_hdr(path[cur_ppos+1].p_bh); | ||
| 125 | } | ||
| 126 | |||
| 127 | path[leaf_ppos].p_ext = *extent = NULL; | ||
| 128 | |||
| 129 | eh = path[leaf_ppos].p_hdr; | ||
| 130 | if (le16_to_cpu(eh->eh_entries) == 0) | ||
| 131 | /* empty leaf is found */ | ||
| 132 | return -ENODATA; | ||
| 133 | |||
| 134 | /* leaf block */ | ||
| 135 | path[leaf_ppos].p_ext = *extent = | ||
| 136 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | ||
| 137 | path[leaf_ppos].p_block = | ||
| 138 | ext4_ext_pblock(path[leaf_ppos].p_ext); | ||
| 139 | return 0; | ||
| 140 | } | ||
| 141 | } | 47 | } |
| 142 | /* We found the last extent */ | 48 | *ppath = path; |
| 143 | return 1; | 49 | return 0; |
| 144 | } | 50 | } |
| 145 | 51 | ||
| 146 | /** | 52 | /** |
| @@ -178,417 +84,6 @@ ext4_double_up_write_data_sem(struct inode *orig_inode, | |||
| 178 | } | 84 | } |
| 179 | 85 | ||
| 180 | /** | 86 | /** |
| 181 | * mext_insert_across_blocks - Insert extents across leaf block | ||
| 182 | * | ||
| 183 | * @handle: journal handle | ||
| 184 | * @orig_inode: original inode | ||
| 185 | * @o_start: first original extent to be changed | ||
| 186 | * @o_end: last original extent to be changed | ||
| 187 | * @start_ext: first new extent to be inserted | ||
| 188 | * @new_ext: middle of new extent to be inserted | ||
| 189 | * @end_ext: last new extent to be inserted | ||
| 190 | * | ||
| 191 | * Allocate a new leaf block and insert extents into it. Return 0 on success, | ||
| 192 | * or a negative error value on failure. | ||
| 193 | */ | ||
| 194 | static int | ||
| 195 | mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | ||
| 196 | struct ext4_extent *o_start, struct ext4_extent *o_end, | ||
| 197 | struct ext4_extent *start_ext, struct ext4_extent *new_ext, | ||
| 198 | struct ext4_extent *end_ext) | ||
| 199 | { | ||
| 200 | struct ext4_ext_path *orig_path = NULL; | ||
| 201 | ext4_lblk_t eblock = 0; | ||
| 202 | int new_flag = 0; | ||
| 203 | int end_flag = 0; | ||
| 204 | int err = 0; | ||
| 205 | |||
| 206 | if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) { | ||
| 207 | if (o_start == o_end) { | ||
| 208 | |||
| 209 | /* start_ext new_ext end_ext | ||
| 210 | * donor |---------|-----------|--------| | ||
| 211 | * orig |------------------------------| | ||
| 212 | */ | ||
| 213 | end_flag = 1; | ||
| 214 | } else { | ||
| 215 | |||
| 216 | /* start_ext new_ext end_ext | ||
| 217 | * donor |---------|----------|---------| | ||
| 218 | * orig |---------------|--------------| | ||
| 219 | */ | ||
| 220 | o_end->ee_block = end_ext->ee_block; | ||
| 221 | o_end->ee_len = end_ext->ee_len; | ||
| 222 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); | ||
| 223 | } | ||
| 224 | |||
| 225 | o_start->ee_len = start_ext->ee_len; | ||
| 226 | eblock = le32_to_cpu(start_ext->ee_block); | ||
| 227 | new_flag = 1; | ||
| 228 | |||
| 229 | } else if (start_ext->ee_len && new_ext->ee_len && | ||
| 230 | !end_ext->ee_len && o_start == o_end) { | ||
| 231 | |||
| 232 | /* start_ext new_ext | ||
| 233 | * donor |--------------|---------------| | ||
| 234 | * orig |------------------------------| | ||
| 235 | */ | ||
| 236 | o_start->ee_len = start_ext->ee_len; | ||
| 237 | eblock = le32_to_cpu(start_ext->ee_block); | ||
| 238 | new_flag = 1; | ||
| 239 | |||
| 240 | } else if (!start_ext->ee_len && new_ext->ee_len && | ||
| 241 | end_ext->ee_len && o_start == o_end) { | ||
| 242 | |||
| 243 | /* new_ext end_ext | ||
| 244 | * donor |--------------|---------------| | ||
| 245 | * orig |------------------------------| | ||
| 246 | */ | ||
| 247 | o_end->ee_block = end_ext->ee_block; | ||
| 248 | o_end->ee_len = end_ext->ee_len; | ||
| 249 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Set 0 to the extent block if new_ext was | ||
| 253 | * the first block. | ||
| 254 | */ | ||
| 255 | if (new_ext->ee_block) | ||
| 256 | eblock = le32_to_cpu(new_ext->ee_block); | ||
| 257 | |||
| 258 | new_flag = 1; | ||
| 259 | } else { | ||
| 260 | ext4_debug("ext4 move extent: Unexpected insert case\n"); | ||
| 261 | return -EIO; | ||
| 262 | } | ||
| 263 | |||
| 264 | if (new_flag) { | ||
| 265 | err = get_ext_path(orig_inode, eblock, &orig_path); | ||
| 266 | if (err) | ||
| 267 | goto out; | ||
| 268 | |||
| 269 | if (ext4_ext_insert_extent(handle, orig_inode, | ||
| 270 | orig_path, new_ext, 0)) | ||
| 271 | goto out; | ||
| 272 | } | ||
| 273 | |||
| 274 | if (end_flag) { | ||
| 275 | err = get_ext_path(orig_inode, | ||
| 276 | le32_to_cpu(end_ext->ee_block) - 1, &orig_path); | ||
| 277 | if (err) | ||
| 278 | goto out; | ||
| 279 | |||
| 280 | if (ext4_ext_insert_extent(handle, orig_inode, | ||
| 281 | orig_path, end_ext, 0)) | ||
| 282 | goto out; | ||
| 283 | } | ||
| 284 | out: | ||
| 285 | if (orig_path) { | ||
| 286 | ext4_ext_drop_refs(orig_path); | ||
| 287 | kfree(orig_path); | ||
| 288 | } | ||
| 289 | |||
| 290 | return err; | ||
| 291 | |||
| 292 | } | ||
| 293 | |||
| 294 | /** | ||
| 295 | * mext_insert_inside_block - Insert new extent to the extent block | ||
| 296 | * | ||
| 297 | * @o_start: first original extent to be moved | ||
| 298 | * @o_end: last original extent to be moved | ||
| 299 | * @start_ext: first new extent to be inserted | ||
| 300 | * @new_ext: middle of new extent to be inserted | ||
| 301 | * @end_ext: last new extent to be inserted | ||
| 302 | * @eh: extent header of target leaf block | ||
| 303 | * @range_to_move: used to decide how to insert extent | ||
| 304 | * | ||
| 305 | * Insert extents into the leaf block. The extent (@o_start) is overwritten | ||
| 306 | * by inserted extents. | ||
| 307 | */ | ||
| 308 | static void | ||
| 309 | mext_insert_inside_block(struct ext4_extent *o_start, | ||
| 310 | struct ext4_extent *o_end, | ||
| 311 | struct ext4_extent *start_ext, | ||
| 312 | struct ext4_extent *new_ext, | ||
| 313 | struct ext4_extent *end_ext, | ||
| 314 | struct ext4_extent_header *eh, | ||
| 315 | int range_to_move) | ||
| 316 | { | ||
| 317 | int i = 0; | ||
| 318 | unsigned long len; | ||
| 319 | |||
| 320 | /* Move the existing extents */ | ||
| 321 | if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) { | ||
| 322 | len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) - | ||
| 323 | (unsigned long)(o_end + 1); | ||
| 324 | memmove(o_end + 1 + range_to_move, o_end + 1, len); | ||
| 325 | } | ||
| 326 | |||
| 327 | /* Insert start entry */ | ||
| 328 | if (start_ext->ee_len) | ||
| 329 | o_start[i++].ee_len = start_ext->ee_len; | ||
| 330 | |||
| 331 | /* Insert new entry */ | ||
| 332 | if (new_ext->ee_len) { | ||
| 333 | o_start[i] = *new_ext; | ||
| 334 | ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); | ||
| 335 | } | ||
| 336 | |||
| 337 | /* Insert end entry */ | ||
| 338 | if (end_ext->ee_len) | ||
| 339 | o_start[i] = *end_ext; | ||
| 340 | |||
| 341 | /* Increment the total entries counter on the extent block */ | ||
| 342 | le16_add_cpu(&eh->eh_entries, range_to_move); | ||
| 343 | } | ||
| 344 | |||
| 345 | /** | ||
| 346 | * mext_insert_extents - Insert new extent | ||
| 347 | * | ||
| 348 | * @handle: journal handle | ||
| 349 | * @orig_inode: original inode | ||
| 350 | * @orig_path: path indicates first extent to be changed | ||
| 351 | * @o_start: first original extent to be changed | ||
| 352 | * @o_end: last original extent to be changed | ||
| 353 | * @start_ext: first new extent to be inserted | ||
| 354 | * @new_ext: middle of new extent to be inserted | ||
| 355 | * @end_ext: last new extent to be inserted | ||
| 356 | * | ||
| 357 | * Call the function to insert extents. If we cannot add more extents into | ||
| 358 | * the leaf block, we call mext_insert_across_blocks() to create a | ||
| 359 | * new leaf block. Otherwise call mext_insert_inside_block(). Return 0 | ||
| 360 | * on success, or a negative error value on failure. | ||
| 361 | */ | ||
| 362 | static int | ||
| 363 | mext_insert_extents(handle_t *handle, struct inode *orig_inode, | ||
| 364 | struct ext4_ext_path *orig_path, | ||
| 365 | struct ext4_extent *o_start, | ||
| 366 | struct ext4_extent *o_end, | ||
| 367 | struct ext4_extent *start_ext, | ||
| 368 | struct ext4_extent *new_ext, | ||
| 369 | struct ext4_extent *end_ext) | ||
| 370 | { | ||
| 371 | struct ext4_extent_header *eh; | ||
| 372 | unsigned long need_slots, slots_range; | ||
| 373 | int range_to_move, depth, ret; | ||
| 374 | |||
| 375 | /* | ||
| 376 | * The extents need to be inserted | ||
| 377 | * start_extent + new_extent + end_extent. | ||
| 378 | */ | ||
| 379 | need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) + | ||
| 380 | (new_ext->ee_len ? 1 : 0); | ||
| 381 | |||
| 382 | /* The number of slots between start and end */ | ||
| 383 | slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1) | ||
| 384 | / sizeof(struct ext4_extent); | ||
| 385 | |||
| 386 | /* Range to move the end of extent */ | ||
| 387 | range_to_move = need_slots - slots_range; | ||
| 388 | depth = orig_path->p_depth; | ||
| 389 | orig_path += depth; | ||
| 390 | eh = orig_path->p_hdr; | ||
| 391 | |||
| 392 | if (depth) { | ||
| 393 | /* Register to journal */ | ||
| 394 | BUFFER_TRACE(orig_path->p_bh, "get_write_access"); | ||
| 395 | ret = ext4_journal_get_write_access(handle, orig_path->p_bh); | ||
| 396 | if (ret) | ||
| 397 | return ret; | ||
| 398 | } | ||
| 399 | |||
| 400 | /* Expansion */ | ||
| 401 | if (range_to_move > 0 && | ||
| 402 | (range_to_move > le16_to_cpu(eh->eh_max) | ||
| 403 | - le16_to_cpu(eh->eh_entries))) { | ||
| 404 | |||
| 405 | ret = mext_insert_across_blocks(handle, orig_inode, o_start, | ||
| 406 | o_end, start_ext, new_ext, end_ext); | ||
| 407 | if (ret < 0) | ||
| 408 | return ret; | ||
| 409 | } else | ||
| 410 | mext_insert_inside_block(o_start, o_end, start_ext, new_ext, | ||
| 411 | end_ext, eh, range_to_move); | ||
| 412 | |||
| 413 | return ext4_ext_dirty(handle, orig_inode, orig_path); | ||
| 414 | } | ||
| 415 | |||
| 416 | /** | ||
| 417 | * mext_leaf_block - Move one leaf extent block into the inode. | ||
| 418 | * | ||
| 419 | * @handle: journal handle | ||
| 420 | * @orig_inode: original inode | ||
| 421 | * @orig_path: path indicates first extent to be changed | ||
| 422 | * @dext: donor extent | ||
| 423 | * @from: start offset on the target file | ||
| 424 | * | ||
| 425 | * In order to insert extents into the leaf block, we must divide the extent | ||
| 426 | * in the leaf block into three extents. The one is located to be inserted | ||
| 427 | * extents, and the others are located around it. | ||
| 428 | * | ||
| 429 | * Therefore, this function creates structures to save extents of the leaf | ||
| 430 | * block, and inserts extents by calling mext_insert_extents() with | ||
| 431 | * created extents. Return 0 on success, or a negative error value on failure. | ||
| 432 | */ | ||
| 433 | static int | ||
| 434 | mext_leaf_block(handle_t *handle, struct inode *orig_inode, | ||
| 435 | struct ext4_ext_path *orig_path, struct ext4_extent *dext, | ||
| 436 | ext4_lblk_t *from) | ||
| 437 | { | ||
| 438 | struct ext4_extent *oext, *o_start, *o_end, *prev_ext; | ||
| 439 | struct ext4_extent new_ext, start_ext, end_ext; | ||
| 440 | ext4_lblk_t new_ext_end; | ||
| 441 | int oext_alen, new_ext_alen, end_ext_alen; | ||
| 442 | int depth = ext_depth(orig_inode); | ||
| 443 | int ret; | ||
| 444 | |||
| 445 | start_ext.ee_block = end_ext.ee_block = 0; | ||
| 446 | o_start = o_end = oext = orig_path[depth].p_ext; | ||
| 447 | oext_alen = ext4_ext_get_actual_len(oext); | ||
| 448 | start_ext.ee_len = end_ext.ee_len = 0; | ||
| 449 | |||
| 450 | new_ext.ee_block = cpu_to_le32(*from); | ||
| 451 | ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); | ||
| 452 | new_ext.ee_len = dext->ee_len; | ||
| 453 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | ||
| 454 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | ||
| 455 | |||
| 456 | /* | ||
| 457 | * Case: original extent is first | ||
| 458 | * oext |--------| | ||
| 459 | * new_ext |--| | ||
| 460 | * start_ext |--| | ||
| 461 | */ | ||
| 462 | if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) && | ||
| 463 | le32_to_cpu(new_ext.ee_block) < | ||
| 464 | le32_to_cpu(oext->ee_block) + oext_alen) { | ||
| 465 | start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - | ||
| 466 | le32_to_cpu(oext->ee_block)); | ||
| 467 | start_ext.ee_block = oext->ee_block; | ||
| 468 | copy_extent_status(oext, &start_ext); | ||
| 469 | } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { | ||
| 470 | prev_ext = oext - 1; | ||
| 471 | /* | ||
| 472 | * We can merge new_ext into previous extent, | ||
| 473 | * if these are contiguous and same extent type. | ||
| 474 | */ | ||
| 475 | if (ext4_can_extents_be_merged(orig_inode, prev_ext, | ||
| 476 | &new_ext)) { | ||
| 477 | o_start = prev_ext; | ||
| 478 | start_ext.ee_len = cpu_to_le16( | ||
| 479 | ext4_ext_get_actual_len(prev_ext) + | ||
| 480 | new_ext_alen); | ||
| 481 | start_ext.ee_block = oext->ee_block; | ||
| 482 | copy_extent_status(prev_ext, &start_ext); | ||
| 483 | new_ext.ee_len = 0; | ||
| 484 | } | ||
| 485 | } | ||
| 486 | |||
| 487 | /* | ||
| 488 | * Case: new_ext_end must be less than oext | ||
| 489 | * oext |-----------| | ||
| 490 | * new_ext |-------| | ||
| 491 | */ | ||
| 492 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { | ||
| 493 | EXT4_ERROR_INODE(orig_inode, | ||
| 494 | "new_ext_end(%u) should be less than or equal to " | ||
| 495 | "oext->ee_block(%u) + oext_alen(%d) - 1", | ||
| 496 | new_ext_end, le32_to_cpu(oext->ee_block), | ||
| 497 | oext_alen); | ||
| 498 | ret = -EIO; | ||
| 499 | goto out; | ||
| 500 | } | ||
| 501 | |||
| 502 | /* | ||
| 503 | * Case: new_ext is smaller than original extent | ||
| 504 | * oext |---------------| | ||
| 505 | * new_ext |-----------| | ||
| 506 | * end_ext |---| | ||
| 507 | */ | ||
| 508 | if (le32_to_cpu(oext->ee_block) <= new_ext_end && | ||
| 509 | new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) { | ||
| 510 | end_ext.ee_len = | ||
| 511 | cpu_to_le16(le32_to_cpu(oext->ee_block) + | ||
| 512 | oext_alen - 1 - new_ext_end); | ||
| 513 | copy_extent_status(oext, &end_ext); | ||
| 514 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); | ||
| 515 | ext4_ext_store_pblock(&end_ext, | ||
| 516 | (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); | ||
| 517 | end_ext.ee_block = | ||
| 518 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + | ||
| 519 | oext_alen - end_ext_alen); | ||
| 520 | } | ||
| 521 | |||
| 522 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, | ||
| 523 | o_end, &start_ext, &new_ext, &end_ext); | ||
| 524 | out: | ||
| 525 | return ret; | ||
| 526 | } | ||
| 527 | |||
| 528 | /** | ||
| 529 | * mext_calc_swap_extents - Calculate extents for extent swapping. | ||
| 530 | * | ||
| 531 | * @tmp_dext: the extent that will belong to the original inode | ||
| 532 | * @tmp_oext: the extent that will belong to the donor inode | ||
| 533 | * @orig_off: block offset of original inode | ||
| 534 | * @donor_off: block offset of donor inode | ||
| 535 | * @max_count: the maximum length of extents | ||
| 536 | * | ||
| 537 | * Return 0 on success, or a negative error value on failure. | ||
| 538 | */ | ||
| 539 | static int | ||
| 540 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, | ||
| 541 | struct ext4_extent *tmp_oext, | ||
| 542 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, | ||
| 543 | ext4_lblk_t max_count) | ||
| 544 | { | ||
| 545 | ext4_lblk_t diff, orig_diff; | ||
| 546 | struct ext4_extent dext_old, oext_old; | ||
| 547 | |||
| 548 | BUG_ON(orig_off != donor_off); | ||
| 549 | |||
| 550 | /* original and donor extents have to cover the same block offset */ | ||
| 551 | if (orig_off < le32_to_cpu(tmp_oext->ee_block) || | ||
| 552 | le32_to_cpu(tmp_oext->ee_block) + | ||
| 553 | ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) | ||
| 554 | return -ENODATA; | ||
| 555 | |||
| 556 | if (orig_off < le32_to_cpu(tmp_dext->ee_block) || | ||
| 557 | le32_to_cpu(tmp_dext->ee_block) + | ||
| 558 | ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) | ||
| 559 | return -ENODATA; | ||
| 560 | |||
| 561 | dext_old = *tmp_dext; | ||
| 562 | oext_old = *tmp_oext; | ||
| 563 | |||
| 564 | /* When tmp_dext is too large, pick up the target range. */ | ||
| 565 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | ||
| 566 | |||
| 567 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); | ||
| 568 | le32_add_cpu(&tmp_dext->ee_block, diff); | ||
| 569 | le16_add_cpu(&tmp_dext->ee_len, -diff); | ||
| 570 | |||
| 571 | if (max_count < ext4_ext_get_actual_len(tmp_dext)) | ||
| 572 | tmp_dext->ee_len = cpu_to_le16(max_count); | ||
| 573 | |||
| 574 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); | ||
| 575 | ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); | ||
| 576 | |||
| 577 | /* Adjust extent length if donor extent is larger than orig */ | ||
| 578 | if (ext4_ext_get_actual_len(tmp_dext) > | ||
| 579 | ext4_ext_get_actual_len(tmp_oext) - orig_diff) | ||
| 580 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) - | ||
| 581 | orig_diff); | ||
| 582 | |||
| 583 | tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext)); | ||
| 584 | |||
| 585 | copy_extent_status(&oext_old, tmp_dext); | ||
| 586 | copy_extent_status(&dext_old, tmp_oext); | ||
| 587 | |||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | |||
| 591 | /** | ||
| 592 | * mext_check_coverage - Check that all extents in range has the same type | 87 | * mext_check_coverage - Check that all extents in range has the same type |
| 593 | * | 88 | * |
| 594 | * @inode: inode in question | 89 | * @inode: inode in question |
| @@ -619,171 +114,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, | |||
| 619 | } | 114 | } |
| 620 | ret = 1; | 115 | ret = 1; |
| 621 | out: | 116 | out: |
| 622 | if (path) { | 117 | ext4_ext_drop_refs(path); |
| 623 | ext4_ext_drop_refs(path); | 118 | kfree(path); |
| 624 | kfree(path); | ||
| 625 | } | ||
| 626 | return ret; | 119 | return ret; |
| 627 | } | 120 | } |
| 628 | 121 | ||
| 629 | /** | 122 | /** |
| 630 | * mext_replace_branches - Replace original extents with new extents | ||
| 631 | * | ||
| 632 | * @handle: journal handle | ||
| 633 | * @orig_inode: original inode | ||
| 634 | * @donor_inode: donor inode | ||
| 635 | * @from: block offset of orig_inode | ||
| 636 | * @count: block count to be replaced | ||
| 637 | * @err: pointer to save return value | ||
| 638 | * | ||
| 639 | * Replace original inode extents and donor inode extents page by page. | ||
| 640 | * We implement this replacement in the following three steps: | ||
| 641 | * 1. Save the block information of original and donor inodes into | ||
| 642 | * dummy extents. | ||
| 643 | * 2. Change the block information of original inode to point at the | ||
| 644 | * donor inode blocks. | ||
| 645 | * 3. Change the block information of donor inode to point at the saved | ||
| 646 | * original inode blocks in the dummy extents. | ||
| 647 | * | ||
| 648 | * Return replaced block count. | ||
| 649 | */ | ||
| 650 | static int | ||
| 651 | mext_replace_branches(handle_t *handle, struct inode *orig_inode, | ||
| 652 | struct inode *donor_inode, ext4_lblk_t from, | ||
| 653 | ext4_lblk_t count, int *err) | ||
| 654 | { | ||
| 655 | struct ext4_ext_path *orig_path = NULL; | ||
| 656 | struct ext4_ext_path *donor_path = NULL; | ||
| 657 | struct ext4_extent *oext, *dext; | ||
| 658 | struct ext4_extent tmp_dext, tmp_oext; | ||
| 659 | ext4_lblk_t orig_off = from, donor_off = from; | ||
| 660 | int depth; | ||
| 661 | int replaced_count = 0; | ||
| 662 | int dext_alen; | ||
| 663 | |||
| 664 | *err = ext4_es_remove_extent(orig_inode, from, count); | ||
| 665 | if (*err) | ||
| 666 | goto out; | ||
| 667 | |||
| 668 | *err = ext4_es_remove_extent(donor_inode, from, count); | ||
| 669 | if (*err) | ||
| 670 | goto out; | ||
| 671 | |||
| 672 | /* Get the original extent for the block "orig_off" */ | ||
| 673 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | ||
| 674 | if (*err) | ||
| 675 | goto out; | ||
| 676 | |||
| 677 | /* Get the donor extent for the head */ | ||
| 678 | *err = get_ext_path(donor_inode, donor_off, &donor_path); | ||
| 679 | if (*err) | ||
| 680 | goto out; | ||
| 681 | depth = ext_depth(orig_inode); | ||
| 682 | oext = orig_path[depth].p_ext; | ||
| 683 | tmp_oext = *oext; | ||
| 684 | |||
| 685 | depth = ext_depth(donor_inode); | ||
| 686 | dext = donor_path[depth].p_ext; | ||
| 687 | if (unlikely(!dext)) | ||
| 688 | goto missing_donor_extent; | ||
| 689 | tmp_dext = *dext; | ||
| 690 | |||
| 691 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | ||
| 692 | donor_off, count); | ||
| 693 | if (*err) | ||
| 694 | goto out; | ||
| 695 | |||
| 696 | /* Loop for the donor extents */ | ||
| 697 | while (1) { | ||
| 698 | /* The extent for donor must be found. */ | ||
| 699 | if (unlikely(!dext)) { | ||
| 700 | missing_donor_extent: | ||
| 701 | EXT4_ERROR_INODE(donor_inode, | ||
| 702 | "The extent for donor must be found"); | ||
| 703 | *err = -EIO; | ||
| 704 | goto out; | ||
| 705 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | ||
| 706 | EXT4_ERROR_INODE(donor_inode, | ||
| 707 | "Donor offset(%u) and the first block of donor " | ||
| 708 | "extent(%u) should be equal", | ||
| 709 | donor_off, | ||
| 710 | le32_to_cpu(tmp_dext.ee_block)); | ||
| 711 | *err = -EIO; | ||
| 712 | goto out; | ||
| 713 | } | ||
| 714 | |||
| 715 | /* Set donor extent to orig extent */ | ||
| 716 | *err = mext_leaf_block(handle, orig_inode, | ||
| 717 | orig_path, &tmp_dext, &orig_off); | ||
| 718 | if (*err) | ||
| 719 | goto out; | ||
| 720 | |||
| 721 | /* Set orig extent to donor extent */ | ||
| 722 | *err = mext_leaf_block(handle, donor_inode, | ||
| 723 | donor_path, &tmp_oext, &donor_off); | ||
| 724 | if (*err) | ||
| 725 | goto out; | ||
| 726 | |||
| 727 | dext_alen = ext4_ext_get_actual_len(&tmp_dext); | ||
| 728 | replaced_count += dext_alen; | ||
| 729 | donor_off += dext_alen; | ||
| 730 | orig_off += dext_alen; | ||
| 731 | |||
| 732 | BUG_ON(replaced_count > count); | ||
| 733 | /* Already moved the expected blocks */ | ||
| 734 | if (replaced_count >= count) | ||
| 735 | break; | ||
| 736 | |||
| 737 | if (orig_path) | ||
| 738 | ext4_ext_drop_refs(orig_path); | ||
| 739 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | ||
| 740 | if (*err) | ||
| 741 | goto out; | ||
| 742 | depth = ext_depth(orig_inode); | ||
| 743 | oext = orig_path[depth].p_ext; | ||
| 744 | tmp_oext = *oext; | ||
| 745 | |||
| 746 | if (donor_path) | ||
| 747 | ext4_ext_drop_refs(donor_path); | ||
| 748 | *err = get_ext_path(donor_inode, donor_off, &donor_path); | ||
| 749 | if (*err) | ||
| 750 | goto out; | ||
| 751 | depth = ext_depth(donor_inode); | ||
| 752 | dext = donor_path[depth].p_ext; | ||
| 753 | tmp_dext = *dext; | ||
| 754 | |||
| 755 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | ||
| 756 | donor_off, count - replaced_count); | ||
| 757 | if (*err) | ||
| 758 | goto out; | ||
| 759 | } | ||
| 760 | |||
| 761 | out: | ||
| 762 | if (orig_path) { | ||
| 763 | ext4_ext_drop_refs(orig_path); | ||
| 764 | kfree(orig_path); | ||
| 765 | } | ||
| 766 | if (donor_path) { | ||
| 767 | ext4_ext_drop_refs(donor_path); | ||
| 768 | kfree(donor_path); | ||
| 769 | } | ||
| 770 | |||
| 771 | return replaced_count; | ||
| 772 | } | ||
| 773 | |||
| 774 | /** | ||
| 775 | * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 | 123 | * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 |
| 776 | * | 124 | * |
| 777 | * @inode1: the inode structure | 125 | * @inode1: the inode structure |
| 778 | * @inode2: the inode structure | 126 | * @inode2: the inode structure |
| 779 | * @index: page index | 127 | * @index1: page index |
| 128 | * @index2: page index | ||
| 780 | * @page: result page vector | 129 | * @page: result page vector |
| 781 | * | 130 | * |
| 782 | * Grab two locked pages for inode's by inode order | 131 | * Grab two locked pages for inode's by inode order |
| 783 | */ | 132 | */ |
| 784 | static int | 133 | static int |
| 785 | mext_page_double_lock(struct inode *inode1, struct inode *inode2, | 134 | mext_page_double_lock(struct inode *inode1, struct inode *inode2, |
| 786 | pgoff_t index, struct page *page[2]) | 135 | pgoff_t index1, pgoff_t index2, struct page *page[2]) |
| 787 | { | 136 | { |
| 788 | struct address_space *mapping[2]; | 137 | struct address_space *mapping[2]; |
| 789 | unsigned fl = AOP_FLAG_NOFS; | 138 | unsigned fl = AOP_FLAG_NOFS; |
| @@ -793,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2, | |||
| 793 | mapping[0] = inode1->i_mapping; | 142 | mapping[0] = inode1->i_mapping; |
| 794 | mapping[1] = inode2->i_mapping; | 143 | mapping[1] = inode2->i_mapping; |
| 795 | } else { | 144 | } else { |
| 145 | pgoff_t tmp = index1; | ||
| 146 | index1 = index2; | ||
| 147 | index2 = tmp; | ||
| 796 | mapping[0] = inode2->i_mapping; | 148 | mapping[0] = inode2->i_mapping; |
| 797 | mapping[1] = inode1->i_mapping; | 149 | mapping[1] = inode1->i_mapping; |
| 798 | } | 150 | } |
| 799 | 151 | ||
| 800 | page[0] = grab_cache_page_write_begin(mapping[0], index, fl); | 152 | page[0] = grab_cache_page_write_begin(mapping[0], index1, fl); |
| 801 | if (!page[0]) | 153 | if (!page[0]) |
| 802 | return -ENOMEM; | 154 | return -ENOMEM; |
| 803 | 155 | ||
| 804 | page[1] = grab_cache_page_write_begin(mapping[1], index, fl); | 156 | page[1] = grab_cache_page_write_begin(mapping[1], index2, fl); |
| 805 | if (!page[1]) { | 157 | if (!page[1]) { |
| 806 | unlock_page(page[0]); | 158 | unlock_page(page[0]); |
| 807 | page_cache_release(page[0]); | 159 | page_cache_release(page[0]); |
| @@ -893,25 +245,27 @@ out: | |||
| 893 | * @o_filp: file structure of original file | 245 | * @o_filp: file structure of original file |
| 894 | * @donor_inode: donor inode | 246 | * @donor_inode: donor inode |
| 895 | * @orig_page_offset: page index on original file | 247 | * @orig_page_offset: page index on original file |
| 248 | * @donor_page_offset: page index on donor file | ||
| 896 | * @data_offset_in_page: block index where data swapping starts | 249 | * @data_offset_in_page: block index where data swapping starts |
| 897 | * @block_len_in_page: the number of blocks to be swapped | 250 | * @block_len_in_page: the number of blocks to be swapped |
| 898 | * @unwritten: orig extent is unwritten or not | 251 | * @unwritten: orig extent is unwritten or not |
| 899 | * @err: pointer to save return value | 252 | * @err: pointer to save return value |
| 900 | * | 253 | * |
| 901 | * Save the data in original inode blocks and replace original inode extents | 254 | * Save the data in original inode blocks and replace original inode extents |
| 902 | * with donor inode extents by calling mext_replace_branches(). | 255 | * with donor inode extents by calling ext4_swap_extents(). |
| 903 | * Finally, write out the saved data in new original inode blocks. Return | 256 | * Finally, write out the saved data in new original inode blocks. Return |
| 904 | * replaced block count. | 257 | * replaced block count. |
| 905 | */ | 258 | */ |
| 906 | static int | 259 | static int |
| 907 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | 260 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
| 908 | pgoff_t orig_page_offset, int data_offset_in_page, | 261 | pgoff_t orig_page_offset, pgoff_t donor_page_offset, |
| 909 | int block_len_in_page, int unwritten, int *err) | 262 | int data_offset_in_page, |
| 263 | int block_len_in_page, int unwritten, int *err) | ||
| 910 | { | 264 | { |
| 911 | struct inode *orig_inode = file_inode(o_filp); | 265 | struct inode *orig_inode = file_inode(o_filp); |
| 912 | struct page *pagep[2] = {NULL, NULL}; | 266 | struct page *pagep[2] = {NULL, NULL}; |
| 913 | handle_t *handle; | 267 | handle_t *handle; |
| 914 | ext4_lblk_t orig_blk_offset; | 268 | ext4_lblk_t orig_blk_offset, donor_blk_offset; |
| 915 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 269 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
| 916 | unsigned int w_flags = 0; | 270 | unsigned int w_flags = 0; |
| 917 | unsigned int tmp_data_size, data_size, replaced_size; | 271 | unsigned int tmp_data_size, data_size, replaced_size; |
| @@ -939,6 +293,9 @@ again: | |||
| 939 | orig_blk_offset = orig_page_offset * blocks_per_page + | 293 | orig_blk_offset = orig_page_offset * blocks_per_page + |
| 940 | data_offset_in_page; | 294 | data_offset_in_page; |
| 941 | 295 | ||
| 296 | donor_blk_offset = donor_page_offset * blocks_per_page + | ||
| 297 | data_offset_in_page; | ||
| 298 | |||
| 942 | /* Calculate data_size */ | 299 | /* Calculate data_size */ |
| 943 | if ((orig_blk_offset + block_len_in_page - 1) == | 300 | if ((orig_blk_offset + block_len_in_page - 1) == |
| 944 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 301 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
| @@ -959,7 +316,7 @@ again: | |||
| 959 | replaced_size = data_size; | 316 | replaced_size = data_size; |
| 960 | 317 | ||
| 961 | *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, | 318 | *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, |
| 962 | pagep); | 319 | donor_page_offset, pagep); |
| 963 | if (unlikely(*err < 0)) | 320 | if (unlikely(*err < 0)) |
| 964 | goto stop_journal; | 321 | goto stop_journal; |
| 965 | /* | 322 | /* |
| @@ -978,7 +335,7 @@ again: | |||
| 978 | if (*err) | 335 | if (*err) |
| 979 | goto drop_data_sem; | 336 | goto drop_data_sem; |
| 980 | 337 | ||
| 981 | unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, | 338 | unwritten &= mext_check_coverage(donor_inode, donor_blk_offset, |
| 982 | block_len_in_page, 1, err); | 339 | block_len_in_page, 1, err); |
| 983 | if (*err) | 340 | if (*err) |
| 984 | goto drop_data_sem; | 341 | goto drop_data_sem; |
| @@ -994,9 +351,10 @@ again: | |||
| 994 | *err = -EBUSY; | 351 | *err = -EBUSY; |
| 995 | goto drop_data_sem; | 352 | goto drop_data_sem; |
| 996 | } | 353 | } |
| 997 | replaced_count = mext_replace_branches(handle, orig_inode, | 354 | replaced_count = ext4_swap_extents(handle, orig_inode, |
| 998 | donor_inode, orig_blk_offset, | 355 | donor_inode, orig_blk_offset, |
| 999 | block_len_in_page, err); | 356 | donor_blk_offset, |
| 357 | block_len_in_page, 1, err); | ||
| 1000 | drop_data_sem: | 358 | drop_data_sem: |
| 1001 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 359 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
| 1002 | goto unlock_pages; | 360 | goto unlock_pages; |
| @@ -1014,9 +372,9 @@ data_copy: | |||
| 1014 | goto unlock_pages; | 372 | goto unlock_pages; |
| 1015 | } | 373 | } |
| 1016 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 374 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
| 1017 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, | 375 | replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode, |
| 1018 | orig_blk_offset, | 376 | orig_blk_offset, donor_blk_offset, |
| 1019 | block_len_in_page, err); | 377 | block_len_in_page, 1, err); |
| 1020 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 378 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
| 1021 | if (*err) { | 379 | if (*err) { |
| 1022 | if (replaced_count) { | 380 | if (replaced_count) { |
| @@ -1061,9 +419,9 @@ repair_branches: | |||
| 1061 | * Try to swap extents to it's original places | 419 | * Try to swap extents to it's original places |
| 1062 | */ | 420 | */ |
| 1063 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 421 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
| 1064 | replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, | 422 | replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode, |
| 1065 | orig_blk_offset, | 423 | orig_blk_offset, donor_blk_offset, |
| 1066 | block_len_in_page, &err2); | 424 | block_len_in_page, 0, &err2); |
| 1067 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 425 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
| 1068 | if (replaced_count != block_len_in_page) { | 426 | if (replaced_count != block_len_in_page) { |
| 1069 | EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), | 427 | EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), |
| @@ -1093,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode, | |||
| 1093 | struct inode *donor_inode, __u64 orig_start, | 451 | struct inode *donor_inode, __u64 orig_start, |
| 1094 | __u64 donor_start, __u64 *len) | 452 | __u64 donor_start, __u64 *len) |
| 1095 | { | 453 | { |
| 1096 | ext4_lblk_t orig_blocks, donor_blocks; | 454 | __u64 orig_eof, donor_eof; |
| 1097 | unsigned int blkbits = orig_inode->i_blkbits; | 455 | unsigned int blkbits = orig_inode->i_blkbits; |
| 1098 | unsigned int blocksize = 1 << blkbits; | 456 | unsigned int blocksize = 1 << blkbits; |
| 1099 | 457 | ||
| 458 | orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits; | ||
| 459 | donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits; | ||
| 460 | |||
| 461 | |||
| 1100 | if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { | 462 | if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { |
| 1101 | ext4_debug("ext4 move extent: suid or sgid is set" | 463 | ext4_debug("ext4 move extent: suid or sgid is set" |
| 1102 | " to donor file [ino:orig %lu, donor %lu]\n", | 464 | " to donor file [ino:orig %lu, donor %lu]\n", |
| @@ -1112,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode, | |||
| 1112 | ext4_debug("ext4 move extent: The argument files should " | 474 | ext4_debug("ext4 move extent: The argument files should " |
| 1113 | "not be swapfile [ino:orig %lu, donor %lu]\n", | 475 | "not be swapfile [ino:orig %lu, donor %lu]\n", |
| 1114 | orig_inode->i_ino, donor_inode->i_ino); | 476 | orig_inode->i_ino, donor_inode->i_ino); |
| 1115 | return -EINVAL; | 477 | return -EBUSY; |
| 1116 | } | 478 | } |
| 1117 | 479 | ||
| 1118 | /* Ext4 move extent supports only extent based file */ | 480 | /* Ext4 move extent supports only extent based file */ |
| @@ -1132,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode, | |||
| 1132 | } | 494 | } |
| 1133 | 495 | ||
| 1134 | /* Start offset should be same */ | 496 | /* Start offset should be same */ |
| 1135 | if (orig_start != donor_start) { | 497 | if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) != |
| 498 | (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) { | ||
| 1136 | ext4_debug("ext4 move extent: orig and donor's start " | 499 | ext4_debug("ext4 move extent: orig and donor's start " |
| 1137 | "offset are not same [ino:orig %lu, donor %lu]\n", | 500 | "offset are not alligned [ino:orig %lu, donor %lu]\n", |
| 1138 | orig_inode->i_ino, donor_inode->i_ino); | 501 | orig_inode->i_ino, donor_inode->i_ino); |
| 1139 | return -EINVAL; | 502 | return -EINVAL; |
| 1140 | } | 503 | } |
| 1141 | 504 | ||
| 1142 | if ((orig_start >= EXT_MAX_BLOCKS) || | 505 | if ((orig_start >= EXT_MAX_BLOCKS) || |
| 506 | (donor_start >= EXT_MAX_BLOCKS) || | ||
| 1143 | (*len > EXT_MAX_BLOCKS) || | 507 | (*len > EXT_MAX_BLOCKS) || |
| 508 | (donor_start + *len >= EXT_MAX_BLOCKS) || | ||
| 1144 | (orig_start + *len >= EXT_MAX_BLOCKS)) { | 509 | (orig_start + *len >= EXT_MAX_BLOCKS)) { |
| 1145 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " | 510 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
| 1146 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, | 511 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, |
| 1147 | orig_inode->i_ino, donor_inode->i_ino); | 512 | orig_inode->i_ino, donor_inode->i_ino); |
| 1148 | return -EINVAL; | 513 | return -EINVAL; |
| 1149 | } | 514 | } |
| 1150 | 515 | if (orig_eof < orig_start + *len - 1) | |
| 1151 | if (orig_inode->i_size > donor_inode->i_size) { | 516 | *len = orig_eof - orig_start; |
| 1152 | donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; | 517 | if (donor_eof < donor_start + *len - 1) |
| 1153 | /* TODO: eliminate this artificial restriction */ | 518 | *len = donor_eof - donor_start; |
| 1154 | if (orig_start >= donor_blocks) { | ||
| 1155 | ext4_debug("ext4 move extent: orig start offset " | ||
| 1156 | "[%llu] should be less than donor file blocks " | ||
| 1157 | "[%u] [ino:orig %lu, donor %lu]\n", | ||
| 1158 | orig_start, donor_blocks, | ||
| 1159 | orig_inode->i_ino, donor_inode->i_ino); | ||
| 1160 | return -EINVAL; | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | /* TODO: eliminate this artificial restriction */ | ||
| 1164 | if (orig_start + *len > donor_blocks) { | ||
| 1165 | ext4_debug("ext4 move extent: End offset [%llu] should " | ||
| 1166 | "be less than donor file blocks [%u]." | ||
| 1167 | "So adjust length from %llu to %llu " | ||
| 1168 | "[ino:orig %lu, donor %lu]\n", | ||
| 1169 | orig_start + *len, donor_blocks, | ||
| 1170 | *len, donor_blocks - orig_start, | ||
| 1171 | orig_inode->i_ino, donor_inode->i_ino); | ||
| 1172 | *len = donor_blocks - orig_start; | ||
| 1173 | } | ||
| 1174 | } else { | ||
| 1175 | orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; | ||
| 1176 | if (orig_start >= orig_blocks) { | ||
| 1177 | ext4_debug("ext4 move extent: start offset [%llu] " | ||
| 1178 | "should be less than original file blocks " | ||
| 1179 | "[%u] [ino:orig %lu, donor %lu]\n", | ||
| 1180 | orig_start, orig_blocks, | ||
| 1181 | orig_inode->i_ino, donor_inode->i_ino); | ||
| 1182 | return -EINVAL; | ||
| 1183 | } | ||
| 1184 | |||
| 1185 | if (orig_start + *len > orig_blocks) { | ||
| 1186 | ext4_debug("ext4 move extent: Adjust length " | ||
| 1187 | "from %llu to %llu. Because it should be " | ||
| 1188 | "less than original file blocks " | ||
| 1189 | "[ino:orig %lu, donor %lu]\n", | ||
| 1190 | *len, orig_blocks - orig_start, | ||
| 1191 | orig_inode->i_ino, donor_inode->i_ino); | ||
| 1192 | *len = orig_blocks - orig_start; | ||
| 1193 | } | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | if (!*len) { | 519 | if (!*len) { |
| 1197 | ext4_debug("ext4 move extent: len should not be 0 " | 520 | ext4_debug("ext4 move extent: len should not be 0 " |
| 1198 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | 521 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, |
| @@ -1208,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode, | |||
| 1208 | * | 531 | * |
| 1209 | * @o_filp: file structure of the original file | 532 | * @o_filp: file structure of the original file |
| 1210 | * @d_filp: file structure of the donor file | 533 | * @d_filp: file structure of the donor file |
| 1211 | * @orig_start: start offset in block for orig | 534 | * @orig_blk: start offset in block for orig |
| 1212 | * @donor_start: start offset in block for donor | 535 | * @donor_blk: start offset in block for donor |
| 1213 | * @len: the number of blocks to be moved | 536 | * @len: the number of blocks to be moved |
| 1214 | * @moved_len: moved block length | 537 | * @moved_len: moved block length |
| 1215 | * | 538 | * |
| 1216 | * This function returns 0 and moved block length is set in moved_len | 539 | * This function returns 0 and moved block length is set in moved_len |
| 1217 | * if succeed, otherwise returns error value. | 540 | * if succeed, otherwise returns error value. |
| 1218 | * | 541 | * |
| 1219 | * Note: ext4_move_extents() proceeds the following order. | ||
| 1220 | * 1:ext4_move_extents() calculates the last block number of moving extent | ||
| 1221 | * function by the start block number (orig_start) and the number of blocks | ||
| 1222 | * to be moved (len) specified as arguments. | ||
| 1223 | * If the {orig, donor}_start points a hole, the extent's start offset | ||
| 1224 | * pointed by ext_cur (current extent), holecheck_path, orig_path are set | ||
| 1225 | * after hole behind. | ||
| 1226 | * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent | ||
| 1227 | * or the ext_cur exceeds the block_end which is last logical block number. | ||
| 1228 | * 3:To get the length of continues area, call mext_next_extent() | ||
| 1229 | * specified with the ext_cur (initial value is holecheck_path) re-cursive, | ||
| 1230 | * until find un-continuous extent, the start logical block number exceeds | ||
| 1231 | * the block_end or the extent points to the last extent. | ||
| 1232 | * 4:Exchange the original inode data with donor inode data | ||
| 1233 | * from orig_page_offset to seq_end_page. | ||
| 1234 | * The start indexes of data are specified as arguments. | ||
| 1235 | * That of the original inode is orig_page_offset, | ||
| 1236 | * and the donor inode is also orig_page_offset | ||
| 1237 | * (To easily handle blocksize != pagesize case, the offset for the | ||
| 1238 | * donor inode is block unit). | ||
| 1239 | * 5:Update holecheck_path and orig_path to points a next proceeding extent, | ||
| 1240 | * then returns to step 2. | ||
| 1241 | * 6:Release holecheck_path, orig_path and set the len to moved_len | ||
| 1242 | * which shows the number of moved blocks. | ||
| 1243 | * The moved_len is useful for the command to calculate the file offset | ||
| 1244 | * for starting next move extent ioctl. | ||
| 1245 | * 7:Return 0 on success, or a negative error value on failure. | ||
| 1246 | */ | 542 | */ |
| 1247 | int | 543 | int |
| 1248 | ext4_move_extents(struct file *o_filp, struct file *d_filp, | 544 | ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, |
| 1249 | __u64 orig_start, __u64 donor_start, __u64 len, | 545 | __u64 donor_blk, __u64 len, __u64 *moved_len) |
| 1250 | __u64 *moved_len) | ||
| 1251 | { | 546 | { |
| 1252 | struct inode *orig_inode = file_inode(o_filp); | 547 | struct inode *orig_inode = file_inode(o_filp); |
| 1253 | struct inode *donor_inode = file_inode(d_filp); | 548 | struct inode *donor_inode = file_inode(d_filp); |
| 1254 | struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; | 549 | struct ext4_ext_path *path = NULL; |
| 1255 | struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; | ||
| 1256 | ext4_lblk_t block_start = orig_start; | ||
| 1257 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | ||
| 1258 | ext4_lblk_t rest_blocks; | ||
| 1259 | pgoff_t orig_page_offset = 0, seq_end_page; | ||
| 1260 | int ret, depth, last_extent = 0; | ||
| 1261 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 550 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
| 1262 | int data_offset_in_page; | 551 | ext4_lblk_t o_end, o_start = orig_blk; |
| 1263 | int block_len_in_page; | 552 | ext4_lblk_t d_start = donor_blk; |
| 1264 | int unwritten; | 553 | int ret; |
| 1265 | 554 | ||
| 1266 | if (orig_inode->i_sb != donor_inode->i_sb) { | 555 | if (orig_inode->i_sb != donor_inode->i_sb) { |
| 1267 | ext4_debug("ext4 move extent: The argument files " | 556 | ext4_debug("ext4 move extent: The argument files " |
| @@ -1303,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 1303 | /* Protect extent tree against block allocations via delalloc */ | 592 | /* Protect extent tree against block allocations via delalloc */ |
| 1304 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 593 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
| 1305 | /* Check the filesystem environment whether move_extent can be done */ | 594 | /* Check the filesystem environment whether move_extent can be done */ |
| 1306 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | 595 | ret = mext_check_arguments(orig_inode, donor_inode, orig_blk, |
| 1307 | donor_start, &len); | 596 | donor_blk, &len); |
| 1308 | if (ret) | 597 | if (ret) |
| 1309 | goto out; | 598 | goto out; |
| 599 | o_end = o_start + len; | ||
| 1310 | 600 | ||
| 1311 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 601 | while (o_start < o_end) { |
| 1312 | block_end = block_start + len - 1; | 602 | struct ext4_extent *ex; |
| 1313 | if (file_end < block_end) | 603 | ext4_lblk_t cur_blk, next_blk; |
| 1314 | len -= block_end - file_end; | 604 | pgoff_t orig_page_index, donor_page_index; |
| 605 | int offset_in_page; | ||
| 606 | int unwritten, cur_len; | ||
| 1315 | 607 | ||
| 1316 | ret = get_ext_path(orig_inode, block_start, &orig_path); | 608 | ret = get_ext_path(orig_inode, o_start, &path); |
| 1317 | if (ret) | 609 | if (ret) |
| 1318 | goto out; | ||
| 1319 | |||
| 1320 | /* Get path structure to check the hole */ | ||
| 1321 | ret = get_ext_path(orig_inode, block_start, &holecheck_path); | ||
| 1322 | if (ret) | ||
| 1323 | goto out; | ||
| 1324 | |||
| 1325 | depth = ext_depth(orig_inode); | ||
| 1326 | ext_cur = holecheck_path[depth].p_ext; | ||
| 1327 | |||
| 1328 | /* | ||
| 1329 | * Get proper starting location of block replacement if block_start was | ||
| 1330 | * within the hole. | ||
| 1331 | */ | ||
| 1332 | if (le32_to_cpu(ext_cur->ee_block) + | ||
| 1333 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { | ||
| 1334 | /* | ||
| 1335 | * The hole exists between extents or the tail of | ||
| 1336 | * original file. | ||
| 1337 | */ | ||
| 1338 | last_extent = mext_next_extent(orig_inode, | ||
| 1339 | holecheck_path, &ext_cur); | ||
| 1340 | if (last_extent < 0) { | ||
| 1341 | ret = last_extent; | ||
| 1342 | goto out; | ||
| 1343 | } | ||
| 1344 | last_extent = mext_next_extent(orig_inode, orig_path, | ||
| 1345 | &ext_dummy); | ||
| 1346 | if (last_extent < 0) { | ||
| 1347 | ret = last_extent; | ||
| 1348 | goto out; | 610 | goto out; |
| 1349 | } | 611 | ex = path[path->p_depth].p_ext; |
| 1350 | seq_start = le32_to_cpu(ext_cur->ee_block); | 612 | next_blk = ext4_ext_next_allocated_block(path); |
| 1351 | } else if (le32_to_cpu(ext_cur->ee_block) > block_start) | 613 | cur_blk = le32_to_cpu(ex->ee_block); |
| 1352 | /* The hole exists at the beginning of original file. */ | 614 | cur_len = ext4_ext_get_actual_len(ex); |
| 1353 | seq_start = le32_to_cpu(ext_cur->ee_block); | 615 | /* Check hole before the start pos */ |
| 1354 | else | 616 | if (cur_blk + cur_len - 1 < o_start) { |
| 1355 | seq_start = block_start; | 617 | if (next_blk == EXT_MAX_BLOCKS) { |
| 1356 | 618 | o_start = o_end; | |
| 1357 | /* No blocks within the specified range. */ | 619 | ret = -ENODATA; |
| 1358 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 620 | goto out; |
| 1359 | ext4_debug("ext4 move extent: The specified range of file " | 621 | } |
| 1360 | "may be the hole\n"); | 622 | d_start += next_blk - o_start; |
| 1361 | ret = -EINVAL; | 623 | o_start = next_blk; |
| 1362 | goto out; | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | /* Adjust start blocks */ | ||
| 1366 | add_blocks = min(le32_to_cpu(ext_cur->ee_block) + | ||
| 1367 | ext4_ext_get_actual_len(ext_cur), block_end + 1) - | ||
| 1368 | max(le32_to_cpu(ext_cur->ee_block), block_start); | ||
| 1369 | |||
| 1370 | while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { | ||
| 1371 | seq_blocks += add_blocks; | ||
| 1372 | |||
| 1373 | /* Adjust tail blocks */ | ||
| 1374 | if (seq_start + seq_blocks - 1 > block_end) | ||
| 1375 | seq_blocks = block_end - seq_start + 1; | ||
| 1376 | |||
| 1377 | ext_prev = ext_cur; | ||
| 1378 | last_extent = mext_next_extent(orig_inode, holecheck_path, | ||
| 1379 | &ext_cur); | ||
| 1380 | if (last_extent < 0) { | ||
| 1381 | ret = last_extent; | ||
| 1382 | break; | ||
| 1383 | } | ||
| 1384 | add_blocks = ext4_ext_get_actual_len(ext_cur); | ||
| 1385 | |||
| 1386 | /* | ||
| 1387 | * Extend the length of contiguous block (seq_blocks) | ||
| 1388 | * if extents are contiguous. | ||
| 1389 | */ | ||
| 1390 | if (ext4_can_extents_be_merged(orig_inode, | ||
| 1391 | ext_prev, ext_cur) && | ||
| 1392 | block_end >= le32_to_cpu(ext_cur->ee_block) && | ||
| 1393 | !last_extent) | ||
| 1394 | continue; | 624 | continue; |
| 1395 | 625 | /* Check hole after the start pos */ | |
| 1396 | /* Is original extent is unwritten */ | 626 | } else if (cur_blk > o_start) { |
| 1397 | unwritten = ext4_ext_is_unwritten(ext_prev); | 627 | /* Skip hole */ |
| 1398 | 628 | d_start += cur_blk - o_start; | |
| 1399 | data_offset_in_page = seq_start % blocks_per_page; | 629 | o_start = cur_blk; |
| 1400 | 630 | /* Extent inside requested range ?*/ | |
| 1401 | /* | 631 | if (cur_blk >= o_end) |
| 1402 | * Calculate data blocks count that should be swapped | 632 | goto out; |
| 1403 | * at the first page. | 633 | } else { /* in_range(o_start, o_blk, o_len) */ |
| 1404 | */ | 634 | cur_len += cur_blk - o_start; |
| 1405 | if (data_offset_in_page + seq_blocks > blocks_per_page) { | ||
| 1406 | /* Swapped blocks are across pages */ | ||
| 1407 | block_len_in_page = | ||
| 1408 | blocks_per_page - data_offset_in_page; | ||
| 1409 | } else { | ||
| 1410 | /* Swapped blocks are in a page */ | ||
| 1411 | block_len_in_page = seq_blocks; | ||
| 1412 | } | 635 | } |
| 1413 | 636 | unwritten = ext4_ext_is_unwritten(ex); | |
| 1414 | orig_page_offset = seq_start >> | 637 | if (o_end - o_start < cur_len) |
| 1415 | (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); | 638 | cur_len = o_end - o_start; |
| 1416 | seq_end_page = (seq_start + seq_blocks - 1) >> | 639 | |
| 1417 | (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); | 640 | orig_page_index = o_start >> (PAGE_CACHE_SHIFT - |
| 1418 | seq_start = le32_to_cpu(ext_cur->ee_block); | 641 | orig_inode->i_blkbits); |
| 1419 | rest_blocks = seq_blocks; | 642 | donor_page_index = d_start >> (PAGE_CACHE_SHIFT - |
| 1420 | 643 | donor_inode->i_blkbits); | |
| 644 | offset_in_page = o_start % blocks_per_page; | ||
| 645 | if (cur_len > blocks_per_page- offset_in_page) | ||
| 646 | cur_len = blocks_per_page - offset_in_page; | ||
| 1421 | /* | 647 | /* |
| 1422 | * Up semaphore to avoid following problems: | 648 | * Up semaphore to avoid following problems: |
| 1423 | * a. transaction deadlock among ext4_journal_start, | 649 | * a. transaction deadlock among ext4_journal_start, |
| @@ -1426,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 1426 | * in move_extent_per_page | 652 | * in move_extent_per_page |
| 1427 | */ | 653 | */ |
| 1428 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 654 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
| 1429 | 655 | /* Swap original branches with new branches */ | |
| 1430 | while (orig_page_offset <= seq_end_page) { | 656 | move_extent_per_page(o_filp, donor_inode, |
| 1431 | 657 | orig_page_index, donor_page_index, | |
| 1432 | /* Swap original branches with new branches */ | 658 | offset_in_page, cur_len, |
| 1433 | block_len_in_page = move_extent_per_page( | 659 | unwritten, &ret); |
| 1434 | o_filp, donor_inode, | ||
| 1435 | orig_page_offset, | ||
| 1436 | data_offset_in_page, | ||
| 1437 | block_len_in_page, | ||
| 1438 | unwritten, &ret); | ||
| 1439 | |||
| 1440 | /* Count how many blocks we have exchanged */ | ||
| 1441 | *moved_len += block_len_in_page; | ||
| 1442 | if (ret < 0) | ||
| 1443 | break; | ||
| 1444 | if (*moved_len > len) { | ||
| 1445 | EXT4_ERROR_INODE(orig_inode, | ||
| 1446 | "We replaced blocks too much! " | ||
| 1447 | "sum of replaced: %llu requested: %llu", | ||
| 1448 | *moved_len, len); | ||
| 1449 | ret = -EIO; | ||
| 1450 | break; | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | orig_page_offset++; | ||
| 1454 | data_offset_in_page = 0; | ||
| 1455 | rest_blocks -= block_len_in_page; | ||
| 1456 | if (rest_blocks > blocks_per_page) | ||
| 1457 | block_len_in_page = blocks_per_page; | ||
| 1458 | else | ||
| 1459 | block_len_in_page = rest_blocks; | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | ext4_double_down_write_data_sem(orig_inode, donor_inode); | 660 | ext4_double_down_write_data_sem(orig_inode, donor_inode); |
| 1463 | if (ret < 0) | 661 | if (ret < 0) |
| 1464 | break; | 662 | break; |
| 1465 | 663 | o_start += cur_len; | |
| 1466 | /* Decrease buffer counter */ | 664 | d_start += cur_len; |
| 1467 | if (holecheck_path) | ||
| 1468 | ext4_ext_drop_refs(holecheck_path); | ||
| 1469 | ret = get_ext_path(orig_inode, seq_start, &holecheck_path); | ||
| 1470 | if (ret) | ||
| 1471 | break; | ||
| 1472 | depth = holecheck_path->p_depth; | ||
| 1473 | |||
| 1474 | /* Decrease buffer counter */ | ||
| 1475 | if (orig_path) | ||
| 1476 | ext4_ext_drop_refs(orig_path); | ||
| 1477 | ret = get_ext_path(orig_inode, seq_start, &orig_path); | ||
| 1478 | if (ret) | ||
| 1479 | break; | ||
| 1480 | |||
| 1481 | ext_cur = holecheck_path[depth].p_ext; | ||
| 1482 | add_blocks = ext4_ext_get_actual_len(ext_cur); | ||
| 1483 | seq_blocks = 0; | ||
| 1484 | |||
| 1485 | } | 665 | } |
| 666 | *moved_len = o_start - orig_blk; | ||
| 667 | if (*moved_len > len) | ||
| 668 | *moved_len = len; | ||
| 669 | |||
| 1486 | out: | 670 | out: |
| 1487 | if (*moved_len) { | 671 | if (*moved_len) { |
| 1488 | ext4_discard_preallocations(orig_inode); | 672 | ext4_discard_preallocations(orig_inode); |
| 1489 | ext4_discard_preallocations(donor_inode); | 673 | ext4_discard_preallocations(donor_inode); |
| 1490 | } | 674 | } |
| 1491 | 675 | ||
| 1492 | if (orig_path) { | 676 | ext4_ext_drop_refs(path); |
| 1493 | ext4_ext_drop_refs(orig_path); | 677 | kfree(path); |
| 1494 | kfree(orig_path); | ||
| 1495 | } | ||
| 1496 | if (holecheck_path) { | ||
| 1497 | ext4_ext_drop_refs(holecheck_path); | ||
| 1498 | kfree(holecheck_path); | ||
| 1499 | } | ||
| 1500 | ext4_double_up_write_data_sem(orig_inode, donor_inode); | 678 | ext4_double_up_write_data_sem(orig_inode, donor_inode); |
| 1501 | ext4_inode_resume_unlocked_dio(orig_inode); | 679 | ext4_inode_resume_unlocked_dio(orig_inode); |
| 1502 | ext4_inode_resume_unlocked_dio(donor_inode); | 680 | ext4_inode_resume_unlocked_dio(donor_inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 603e4ebbd0ac..426211882f72 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -53,7 +53,7 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
| 53 | ext4_lblk_t *block) | 53 | ext4_lblk_t *block) |
| 54 | { | 54 | { |
| 55 | struct buffer_head *bh; | 55 | struct buffer_head *bh; |
| 56 | int err = 0; | 56 | int err; |
| 57 | 57 | ||
| 58 | if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && | 58 | if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && |
| 59 | ((inode->i_size >> 10) >= | 59 | ((inode->i_size >> 10) >= |
| @@ -62,9 +62,9 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
| 62 | 62 | ||
| 63 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; | 63 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; |
| 64 | 64 | ||
| 65 | bh = ext4_bread(handle, inode, *block, 1, &err); | 65 | bh = ext4_bread(handle, inode, *block, 1); |
| 66 | if (!bh) | 66 | if (IS_ERR(bh)) |
| 67 | return ERR_PTR(err); | 67 | return bh; |
| 68 | inode->i_size += inode->i_sb->s_blocksize; | 68 | inode->i_size += inode->i_sb->s_blocksize; |
| 69 | EXT4_I(inode)->i_disksize = inode->i_size; | 69 | EXT4_I(inode)->i_disksize = inode->i_size; |
| 70 | BUFFER_TRACE(bh, "get_write_access"); | 70 | BUFFER_TRACE(bh, "get_write_access"); |
| @@ -94,20 +94,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, | |||
| 94 | { | 94 | { |
| 95 | struct buffer_head *bh; | 95 | struct buffer_head *bh; |
| 96 | struct ext4_dir_entry *dirent; | 96 | struct ext4_dir_entry *dirent; |
| 97 | int err = 0, is_dx_block = 0; | 97 | int is_dx_block = 0; |
| 98 | 98 | ||
| 99 | bh = ext4_bread(NULL, inode, block, 0, &err); | 99 | bh = ext4_bread(NULL, inode, block, 0); |
| 100 | if (!bh) { | 100 | if (IS_ERR(bh)) { |
| 101 | if (err == 0) { | ||
| 102 | ext4_error_inode(inode, __func__, line, block, | ||
| 103 | "Directory hole found"); | ||
| 104 | return ERR_PTR(-EIO); | ||
| 105 | } | ||
| 106 | __ext4_warning(inode->i_sb, __func__, line, | 101 | __ext4_warning(inode->i_sb, __func__, line, |
| 107 | "error reading directory block " | 102 | "error %ld reading directory block " |
| 108 | "(ino %lu, block %lu)", inode->i_ino, | 103 | "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino, |
| 109 | (unsigned long) block); | 104 | (unsigned long) block); |
| 110 | return ERR_PTR(err); | 105 | |
| 106 | return bh; | ||
| 107 | } | ||
| 108 | if (!bh) { | ||
| 109 | ext4_error_inode(inode, __func__, line, block, "Directory hole found"); | ||
| 110 | return ERR_PTR(-EIO); | ||
| 111 | } | 111 | } |
| 112 | dirent = (struct ext4_dir_entry *) bh->b_data; | 112 | dirent = (struct ext4_dir_entry *) bh->b_data; |
| 113 | /* Determine whether or not we have an index block */ | 113 | /* Determine whether or not we have an index block */ |
| @@ -124,8 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, | |||
| 124 | "directory leaf block found instead of index block"); | 124 | "directory leaf block found instead of index block"); |
| 125 | return ERR_PTR(-EIO); | 125 | return ERR_PTR(-EIO); |
| 126 | } | 126 | } |
| 127 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 127 | if (!ext4_has_metadata_csum(inode->i_sb) || |
| 128 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) || | ||
| 129 | buffer_verified(bh)) | 128 | buffer_verified(bh)) |
| 130 | return bh; | 129 | return bh; |
| 131 | 130 | ||
| @@ -253,8 +252,7 @@ static unsigned dx_node_limit(struct inode *dir); | |||
| 253 | static struct dx_frame *dx_probe(const struct qstr *d_name, | 252 | static struct dx_frame *dx_probe(const struct qstr *d_name, |
| 254 | struct inode *dir, | 253 | struct inode *dir, |
| 255 | struct dx_hash_info *hinfo, | 254 | struct dx_hash_info *hinfo, |
| 256 | struct dx_frame *frame, | 255 | struct dx_frame *frame); |
| 257 | int *err); | ||
| 258 | static void dx_release(struct dx_frame *frames); | 256 | static void dx_release(struct dx_frame *frames); |
| 259 | static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, | 257 | static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, |
| 260 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 258 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
| @@ -270,8 +268,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
| 270 | __u32 *start_hash); | 268 | __u32 *start_hash); |
| 271 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, | 269 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, |
| 272 | const struct qstr *d_name, | 270 | const struct qstr *d_name, |
| 273 | struct ext4_dir_entry_2 **res_dir, | 271 | struct ext4_dir_entry_2 **res_dir); |
| 274 | int *err); | ||
| 275 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 272 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
| 276 | struct inode *inode); | 273 | struct inode *inode); |
| 277 | 274 | ||
| @@ -340,8 +337,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) | |||
| 340 | { | 337 | { |
| 341 | struct ext4_dir_entry_tail *t; | 338 | struct ext4_dir_entry_tail *t; |
| 342 | 339 | ||
| 343 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 340 | if (!ext4_has_metadata_csum(inode->i_sb)) |
| 344 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 345 | return 1; | 341 | return 1; |
| 346 | 342 | ||
| 347 | t = get_dirent_tail(inode, dirent); | 343 | t = get_dirent_tail(inode, dirent); |
| @@ -362,8 +358,7 @@ static void ext4_dirent_csum_set(struct inode *inode, | |||
| 362 | { | 358 | { |
| 363 | struct ext4_dir_entry_tail *t; | 359 | struct ext4_dir_entry_tail *t; |
| 364 | 360 | ||
| 365 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 361 | if (!ext4_has_metadata_csum(inode->i_sb)) |
| 366 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 367 | return; | 362 | return; |
| 368 | 363 | ||
| 369 | t = get_dirent_tail(inode, dirent); | 364 | t = get_dirent_tail(inode, dirent); |
| @@ -438,8 +433,7 @@ static int ext4_dx_csum_verify(struct inode *inode, | |||
| 438 | struct dx_tail *t; | 433 | struct dx_tail *t; |
| 439 | int count_offset, limit, count; | 434 | int count_offset, limit, count; |
| 440 | 435 | ||
| 441 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 436 | if (!ext4_has_metadata_csum(inode->i_sb)) |
| 442 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 443 | return 1; | 437 | return 1; |
| 444 | 438 | ||
| 445 | c = get_dx_countlimit(inode, dirent, &count_offset); | 439 | c = get_dx_countlimit(inode, dirent, &count_offset); |
| @@ -468,8 +462,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) | |||
| 468 | struct dx_tail *t; | 462 | struct dx_tail *t; |
| 469 | int count_offset, limit, count; | 463 | int count_offset, limit, count; |
| 470 | 464 | ||
| 471 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 465 | if (!ext4_has_metadata_csum(inode->i_sb)) |
| 472 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 473 | return; | 466 | return; |
| 474 | 467 | ||
| 475 | c = get_dx_countlimit(inode, dirent, &count_offset); | 468 | c = get_dx_countlimit(inode, dirent, &count_offset); |
| @@ -557,8 +550,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) | |||
| 557 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 550 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
| 558 | EXT4_DIR_REC_LEN(2) - infosize; | 551 | EXT4_DIR_REC_LEN(2) - infosize; |
| 559 | 552 | ||
| 560 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 553 | if (ext4_has_metadata_csum(dir->i_sb)) |
| 561 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 562 | entry_space -= sizeof(struct dx_tail); | 554 | entry_space -= sizeof(struct dx_tail); |
| 563 | return entry_space / sizeof(struct dx_entry); | 555 | return entry_space / sizeof(struct dx_entry); |
| 564 | } | 556 | } |
| @@ -567,8 +559,7 @@ static inline unsigned dx_node_limit(struct inode *dir) | |||
| 567 | { | 559 | { |
| 568 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 560 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
| 569 | 561 | ||
| 570 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 562 | if (ext4_has_metadata_csum(dir->i_sb)) |
| 571 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 572 | entry_space -= sizeof(struct dx_tail); | 563 | entry_space -= sizeof(struct dx_tail); |
| 573 | return entry_space / sizeof(struct dx_entry); | 564 | return entry_space / sizeof(struct dx_entry); |
| 574 | } | 565 | } |
| @@ -641,7 +632,9 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
| 641 | u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; | 632 | u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; |
| 642 | struct stats stats; | 633 | struct stats stats; |
| 643 | printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); | 634 | printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); |
| 644 | if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue; | 635 | bh = ext4_bread(NULL,dir, block, 0); |
| 636 | if (!bh || IS_ERR(bh)) | ||
| 637 | continue; | ||
| 645 | stats = levels? | 638 | stats = levels? |
| 646 | dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): | 639 | dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): |
| 647 | dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); | 640 | dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); |
| @@ -669,29 +662,25 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
| 669 | */ | 662 | */ |
| 670 | static struct dx_frame * | 663 | static struct dx_frame * |
| 671 | dx_probe(const struct qstr *d_name, struct inode *dir, | 664 | dx_probe(const struct qstr *d_name, struct inode *dir, |
| 672 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 665 | struct dx_hash_info *hinfo, struct dx_frame *frame_in) |
| 673 | { | 666 | { |
| 674 | unsigned count, indirect; | 667 | unsigned count, indirect; |
| 675 | struct dx_entry *at, *entries, *p, *q, *m; | 668 | struct dx_entry *at, *entries, *p, *q, *m; |
| 676 | struct dx_root *root; | 669 | struct dx_root *root; |
| 677 | struct buffer_head *bh; | ||
| 678 | struct dx_frame *frame = frame_in; | 670 | struct dx_frame *frame = frame_in; |
| 671 | struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); | ||
| 679 | u32 hash; | 672 | u32 hash; |
| 680 | 673 | ||
| 681 | frame->bh = NULL; | 674 | frame->bh = ext4_read_dirblock(dir, 0, INDEX); |
| 682 | bh = ext4_read_dirblock(dir, 0, INDEX); | 675 | if (IS_ERR(frame->bh)) |
| 683 | if (IS_ERR(bh)) { | 676 | return (struct dx_frame *) frame->bh; |
| 684 | *err = PTR_ERR(bh); | 677 | |
| 685 | goto fail; | 678 | root = (struct dx_root *) frame->bh->b_data; |
| 686 | } | ||
| 687 | root = (struct dx_root *) bh->b_data; | ||
| 688 | if (root->info.hash_version != DX_HASH_TEA && | 679 | if (root->info.hash_version != DX_HASH_TEA && |
| 689 | root->info.hash_version != DX_HASH_HALF_MD4 && | 680 | root->info.hash_version != DX_HASH_HALF_MD4 && |
| 690 | root->info.hash_version != DX_HASH_LEGACY) { | 681 | root->info.hash_version != DX_HASH_LEGACY) { |
| 691 | ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", | 682 | ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", |
| 692 | root->info.hash_version); | 683 | root->info.hash_version); |
| 693 | brelse(bh); | ||
| 694 | *err = ERR_BAD_DX_DIR; | ||
| 695 | goto fail; | 684 | goto fail; |
| 696 | } | 685 | } |
| 697 | hinfo->hash_version = root->info.hash_version; | 686 | hinfo->hash_version = root->info.hash_version; |
| @@ -705,16 +694,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
| 705 | if (root->info.unused_flags & 1) { | 694 | if (root->info.unused_flags & 1) { |
| 706 | ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", | 695 | ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", |
| 707 | root->info.unused_flags); | 696 | root->info.unused_flags); |
| 708 | brelse(bh); | ||
| 709 | *err = ERR_BAD_DX_DIR; | ||
| 710 | goto fail; | 697 | goto fail; |
| 711 | } | 698 | } |
| 712 | 699 | ||
| 713 | if ((indirect = root->info.indirect_levels) > 1) { | 700 | if ((indirect = root->info.indirect_levels) > 1) { |
| 714 | ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", | 701 | ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", |
| 715 | root->info.indirect_levels); | 702 | root->info.indirect_levels); |
| 716 | brelse(bh); | ||
| 717 | *err = ERR_BAD_DX_DIR; | ||
| 718 | goto fail; | 703 | goto fail; |
| 719 | } | 704 | } |
| 720 | 705 | ||
| @@ -724,27 +709,21 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
| 724 | if (dx_get_limit(entries) != dx_root_limit(dir, | 709 | if (dx_get_limit(entries) != dx_root_limit(dir, |
| 725 | root->info.info_length)) { | 710 | root->info.info_length)) { |
| 726 | ext4_warning(dir->i_sb, "dx entry: limit != root limit"); | 711 | ext4_warning(dir->i_sb, "dx entry: limit != root limit"); |
| 727 | brelse(bh); | ||
| 728 | *err = ERR_BAD_DX_DIR; | ||
| 729 | goto fail; | 712 | goto fail; |
| 730 | } | 713 | } |
| 731 | 714 | ||
| 732 | dxtrace(printk("Look up %x", hash)); | 715 | dxtrace(printk("Look up %x", hash)); |
| 733 | while (1) | 716 | while (1) { |
| 734 | { | ||
| 735 | count = dx_get_count(entries); | 717 | count = dx_get_count(entries); |
| 736 | if (!count || count > dx_get_limit(entries)) { | 718 | if (!count || count > dx_get_limit(entries)) { |
| 737 | ext4_warning(dir->i_sb, | 719 | ext4_warning(dir->i_sb, |
| 738 | "dx entry: no count or count > limit"); | 720 | "dx entry: no count or count > limit"); |
| 739 | brelse(bh); | 721 | goto fail; |
| 740 | *err = ERR_BAD_DX_DIR; | ||
| 741 | goto fail2; | ||
| 742 | } | 722 | } |
| 743 | 723 | ||
| 744 | p = entries + 1; | 724 | p = entries + 1; |
| 745 | q = entries + count - 1; | 725 | q = entries + count - 1; |
| 746 | while (p <= q) | 726 | while (p <= q) { |
| 747 | { | ||
| 748 | m = p + (q - p)/2; | 727 | m = p + (q - p)/2; |
| 749 | dxtrace(printk(".")); | 728 | dxtrace(printk(".")); |
| 750 | if (dx_get_hash(m) > hash) | 729 | if (dx_get_hash(m) > hash) |
| @@ -753,8 +732,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
| 753 | p = m + 1; | 732 | p = m + 1; |
| 754 | } | 733 | } |
| 755 | 734 | ||
| 756 | if (0) // linear search cross check | 735 | if (0) { // linear search cross check |
| 757 | { | ||
| 758 | unsigned n = count - 1; | 736 | unsigned n = count - 1; |
| 759 | at = entries; | 737 | at = entries; |
| 760 | while (n--) | 738 | while (n--) |
| @@ -771,38 +749,35 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
| 771 | 749 | ||
| 772 | at = p - 1; | 750 | at = p - 1; |
| 773 | dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); | 751 | dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); |
| 774 | frame->bh = bh; | ||
| 775 | frame->entries = entries; | 752 | frame->entries = entries; |
| 776 | frame->at = at; | 753 | frame->at = at; |
| 777 | if (!indirect--) return frame; | 754 | if (!indirect--) |
| 778 | bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); | 755 | return frame; |
| 779 | if (IS_ERR(bh)) { | 756 | frame++; |
| 780 | *err = PTR_ERR(bh); | 757 | frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); |
| 781 | goto fail2; | 758 | if (IS_ERR(frame->bh)) { |
| 759 | ret_err = (struct dx_frame *) frame->bh; | ||
| 760 | frame->bh = NULL; | ||
| 761 | goto fail; | ||
| 782 | } | 762 | } |
| 783 | entries = ((struct dx_node *) bh->b_data)->entries; | 763 | entries = ((struct dx_node *) frame->bh->b_data)->entries; |
| 784 | 764 | ||
| 785 | if (dx_get_limit(entries) != dx_node_limit (dir)) { | 765 | if (dx_get_limit(entries) != dx_node_limit (dir)) { |
| 786 | ext4_warning(dir->i_sb, | 766 | ext4_warning(dir->i_sb, |
| 787 | "dx entry: limit != node limit"); | 767 | "dx entry: limit != node limit"); |
| 788 | brelse(bh); | 768 | goto fail; |
| 789 | *err = ERR_BAD_DX_DIR; | ||
| 790 | goto fail2; | ||
| 791 | } | 769 | } |
| 792 | frame++; | ||
| 793 | frame->bh = NULL; | ||
| 794 | } | 770 | } |
| 795 | fail2: | 771 | fail: |
| 796 | while (frame >= frame_in) { | 772 | while (frame >= frame_in) { |
| 797 | brelse(frame->bh); | 773 | brelse(frame->bh); |
| 798 | frame--; | 774 | frame--; |
| 799 | } | 775 | } |
| 800 | fail: | 776 | if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) |
| 801 | if (*err == ERR_BAD_DX_DIR) | ||
| 802 | ext4_warning(dir->i_sb, | 777 | ext4_warning(dir->i_sb, |
| 803 | "Corrupt dir inode %lu, running e2fsck is " | 778 | "Corrupt dir inode %lu, running e2fsck is " |
| 804 | "recommended.", dir->i_ino); | 779 | "recommended.", dir->i_ino); |
| 805 | return NULL; | 780 | return ret_err; |
| 806 | } | 781 | } |
| 807 | 782 | ||
| 808 | static void dx_release (struct dx_frame *frames) | 783 | static void dx_release (struct dx_frame *frames) |
| @@ -988,9 +963,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
| 988 | } | 963 | } |
| 989 | hinfo.hash = start_hash; | 964 | hinfo.hash = start_hash; |
| 990 | hinfo.minor_hash = 0; | 965 | hinfo.minor_hash = 0; |
| 991 | frame = dx_probe(NULL, dir, &hinfo, frames, &err); | 966 | frame = dx_probe(NULL, dir, &hinfo, frames); |
| 992 | if (!frame) | 967 | if (IS_ERR(frame)) |
| 993 | return err; | 968 | return PTR_ERR(frame); |
| 994 | 969 | ||
| 995 | /* Add '.' and '..' from the htree header */ | 970 | /* Add '.' and '..' from the htree header */ |
| 996 | if (!start_hash && !start_minor_hash) { | 971 | if (!start_hash && !start_minor_hash) { |
| @@ -1227,8 +1202,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
| 1227 | buffer */ | 1202 | buffer */ |
| 1228 | int num = 0; | 1203 | int num = 0; |
| 1229 | ext4_lblk_t nblocks; | 1204 | ext4_lblk_t nblocks; |
| 1230 | int i, err = 0; | 1205 | int i, namelen; |
| 1231 | int namelen; | ||
| 1232 | 1206 | ||
| 1233 | *res_dir = NULL; | 1207 | *res_dir = NULL; |
| 1234 | sb = dir->i_sb; | 1208 | sb = dir->i_sb; |
| @@ -1258,17 +1232,13 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
| 1258 | goto restart; | 1232 | goto restart; |
| 1259 | } | 1233 | } |
| 1260 | if (is_dx(dir)) { | 1234 | if (is_dx(dir)) { |
| 1261 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); | 1235 | bh = ext4_dx_find_entry(dir, d_name, res_dir); |
| 1262 | /* | 1236 | /* |
| 1263 | * On success, or if the error was file not found, | 1237 | * On success, or if the error was file not found, |
| 1264 | * return. Otherwise, fall back to doing a search the | 1238 | * return. Otherwise, fall back to doing a search the |
| 1265 | * old fashioned way. | 1239 | * old fashioned way. |
| 1266 | */ | 1240 | */ |
| 1267 | if (err == -ENOENT) | 1241 | if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR) |
| 1268 | return NULL; | ||
| 1269 | if (err && err != ERR_BAD_DX_DIR) | ||
| 1270 | return ERR_PTR(err); | ||
| 1271 | if (bh) | ||
| 1272 | return bh; | 1242 | return bh; |
| 1273 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " | 1243 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
| 1274 | "falling back\n")); | 1244 | "falling back\n")); |
| @@ -1298,10 +1268,10 @@ restart: | |||
| 1298 | break; | 1268 | break; |
| 1299 | } | 1269 | } |
| 1300 | num++; | 1270 | num++; |
| 1301 | bh = ext4_getblk(NULL, dir, b++, 0, &err); | 1271 | bh = ext4_getblk(NULL, dir, b++, 0); |
| 1302 | if (unlikely(err)) { | 1272 | if (unlikely(IS_ERR(bh))) { |
| 1303 | if (ra_max == 0) | 1273 | if (ra_max == 0) |
| 1304 | return ERR_PTR(err); | 1274 | return bh; |
| 1305 | break; | 1275 | break; |
| 1306 | } | 1276 | } |
| 1307 | bh_use[ra_max] = bh; | 1277 | bh_use[ra_max] = bh; |
| @@ -1366,7 +1336,7 @@ cleanup_and_exit: | |||
| 1366 | } | 1336 | } |
| 1367 | 1337 | ||
| 1368 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, | 1338 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
| 1369 | struct ext4_dir_entry_2 **res_dir, int *err) | 1339 | struct ext4_dir_entry_2 **res_dir) |
| 1370 | { | 1340 | { |
| 1371 | struct super_block * sb = dir->i_sb; | 1341 | struct super_block * sb = dir->i_sb; |
| 1372 | struct dx_hash_info hinfo; | 1342 | struct dx_hash_info hinfo; |
| @@ -1375,25 +1345,23 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
| 1375 | ext4_lblk_t block; | 1345 | ext4_lblk_t block; |
| 1376 | int retval; | 1346 | int retval; |
| 1377 | 1347 | ||
| 1378 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) | 1348 | frame = dx_probe(d_name, dir, &hinfo, frames); |
| 1379 | return NULL; | 1349 | if (IS_ERR(frame)) |
| 1350 | return (struct buffer_head *) frame; | ||
| 1380 | do { | 1351 | do { |
| 1381 | block = dx_get_block(frame->at); | 1352 | block = dx_get_block(frame->at); |
| 1382 | bh = ext4_read_dirblock(dir, block, DIRENT); | 1353 | bh = ext4_read_dirblock(dir, block, DIRENT); |
| 1383 | if (IS_ERR(bh)) { | 1354 | if (IS_ERR(bh)) |
| 1384 | *err = PTR_ERR(bh); | ||
| 1385 | goto errout; | 1355 | goto errout; |
| 1386 | } | 1356 | |
| 1387 | retval = search_dirblock(bh, dir, d_name, | 1357 | retval = search_dirblock(bh, dir, d_name, |
| 1388 | block << EXT4_BLOCK_SIZE_BITS(sb), | 1358 | block << EXT4_BLOCK_SIZE_BITS(sb), |
| 1389 | res_dir); | 1359 | res_dir); |
| 1390 | if (retval == 1) { /* Success! */ | 1360 | if (retval == 1) |
| 1391 | dx_release(frames); | 1361 | goto success; |
| 1392 | return bh; | ||
| 1393 | } | ||
| 1394 | brelse(bh); | 1362 | brelse(bh); |
| 1395 | if (retval == -1) { | 1363 | if (retval == -1) { |
| 1396 | *err = ERR_BAD_DX_DIR; | 1364 | bh = ERR_PTR(ERR_BAD_DX_DIR); |
| 1397 | goto errout; | 1365 | goto errout; |
| 1398 | } | 1366 | } |
| 1399 | 1367 | ||
| @@ -1402,18 +1370,19 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
| 1402 | frames, NULL); | 1370 | frames, NULL); |
| 1403 | if (retval < 0) { | 1371 | if (retval < 0) { |
| 1404 | ext4_warning(sb, | 1372 | ext4_warning(sb, |
| 1405 | "error reading index page in directory #%lu", | 1373 | "error %d reading index page in directory #%lu", |
| 1406 | dir->i_ino); | 1374 | retval, dir->i_ino); |
| 1407 | *err = retval; | 1375 | bh = ERR_PTR(retval); |
| 1408 | goto errout; | 1376 | goto errout; |
| 1409 | } | 1377 | } |
| 1410 | } while (retval == 1); | 1378 | } while (retval == 1); |
| 1411 | 1379 | ||
| 1412 | *err = -ENOENT; | 1380 | bh = NULL; |
| 1413 | errout: | 1381 | errout: |
| 1414 | dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); | 1382 | dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); |
| 1415 | dx_release (frames); | 1383 | success: |
| 1416 | return NULL; | 1384 | dx_release(frames); |
| 1385 | return bh; | ||
| 1417 | } | 1386 | } |
| 1418 | 1387 | ||
| 1419 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 1388 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
| @@ -1441,7 +1410,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi | |||
| 1441 | dentry); | 1410 | dentry); |
| 1442 | return ERR_PTR(-EIO); | 1411 | return ERR_PTR(-EIO); |
| 1443 | } | 1412 | } |
| 1444 | inode = ext4_iget(dir->i_sb, ino); | 1413 | inode = ext4_iget_normal(dir->i_sb, ino); |
| 1445 | if (inode == ERR_PTR(-ESTALE)) { | 1414 | if (inode == ERR_PTR(-ESTALE)) { |
| 1446 | EXT4_ERROR_INODE(dir, | 1415 | EXT4_ERROR_INODE(dir, |
| 1447 | "deleted inode referenced: %u", | 1416 | "deleted inode referenced: %u", |
| @@ -1474,7 +1443,7 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
| 1474 | return ERR_PTR(-EIO); | 1443 | return ERR_PTR(-EIO); |
| 1475 | } | 1444 | } |
| 1476 | 1445 | ||
| 1477 | return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); | 1446 | return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino)); |
| 1478 | } | 1447 | } |
| 1479 | 1448 | ||
| 1480 | /* | 1449 | /* |
| @@ -1533,7 +1502,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) | |||
| 1533 | */ | 1502 | */ |
| 1534 | static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | 1503 | static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, |
| 1535 | struct buffer_head **bh,struct dx_frame *frame, | 1504 | struct buffer_head **bh,struct dx_frame *frame, |
| 1536 | struct dx_hash_info *hinfo, int *error) | 1505 | struct dx_hash_info *hinfo) |
| 1537 | { | 1506 | { |
| 1538 | unsigned blocksize = dir->i_sb->s_blocksize; | 1507 | unsigned blocksize = dir->i_sb->s_blocksize; |
| 1539 | unsigned count, continued; | 1508 | unsigned count, continued; |
| @@ -1548,16 +1517,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
| 1548 | int csum_size = 0; | 1517 | int csum_size = 0; |
| 1549 | int err = 0, i; | 1518 | int err = 0, i; |
| 1550 | 1519 | ||
| 1551 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 1520 | if (ext4_has_metadata_csum(dir->i_sb)) |
| 1552 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 1553 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1521 | csum_size = sizeof(struct ext4_dir_entry_tail); |
| 1554 | 1522 | ||
| 1555 | bh2 = ext4_append(handle, dir, &newblock); | 1523 | bh2 = ext4_append(handle, dir, &newblock); |
| 1556 | if (IS_ERR(bh2)) { | 1524 | if (IS_ERR(bh2)) { |
| 1557 | brelse(*bh); | 1525 | brelse(*bh); |
| 1558 | *bh = NULL; | 1526 | *bh = NULL; |
| 1559 | *error = PTR_ERR(bh2); | 1527 | return (struct ext4_dir_entry_2 *) bh2; |
| 1560 | return NULL; | ||
| 1561 | } | 1528 | } |
| 1562 | 1529 | ||
| 1563 | BUFFER_TRACE(*bh, "get_write_access"); | 1530 | BUFFER_TRACE(*bh, "get_write_access"); |
| @@ -1617,8 +1584,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
| 1617 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); | 1584 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); |
| 1618 | 1585 | ||
| 1619 | /* Which block gets the new entry? */ | 1586 | /* Which block gets the new entry? */ |
| 1620 | if (hinfo->hash >= hash2) | 1587 | if (hinfo->hash >= hash2) { |
| 1621 | { | ||
| 1622 | swap(*bh, bh2); | 1588 | swap(*bh, bh2); |
| 1623 | de = de2; | 1589 | de = de2; |
| 1624 | } | 1590 | } |
| @@ -1638,8 +1604,7 @@ journal_error: | |||
| 1638 | brelse(bh2); | 1604 | brelse(bh2); |
| 1639 | *bh = NULL; | 1605 | *bh = NULL; |
| 1640 | ext4_std_error(dir->i_sb, err); | 1606 | ext4_std_error(dir->i_sb, err); |
| 1641 | *error = err; | 1607 | return ERR_PTR(err); |
| 1642 | return NULL; | ||
| 1643 | } | 1608 | } |
| 1644 | 1609 | ||
| 1645 | int ext4_find_dest_de(struct inode *dir, struct inode *inode, | 1610 | int ext4_find_dest_de(struct inode *dir, struct inode *inode, |
| @@ -1718,8 +1683,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
| 1718 | int csum_size = 0; | 1683 | int csum_size = 0; |
| 1719 | int err; | 1684 | int err; |
| 1720 | 1685 | ||
| 1721 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1686 | if (ext4_has_metadata_csum(inode->i_sb)) |
| 1722 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 1723 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1687 | csum_size = sizeof(struct ext4_dir_entry_tail); |
| 1724 | 1688 | ||
| 1725 | if (!de) { | 1689 | if (!de) { |
| @@ -1786,8 +1750,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1786 | struct fake_dirent *fde; | 1750 | struct fake_dirent *fde; |
| 1787 | int csum_size = 0; | 1751 | int csum_size = 0; |
| 1788 | 1752 | ||
| 1789 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1753 | if (ext4_has_metadata_csum(inode->i_sb)) |
| 1790 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 1791 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1754 | csum_size = sizeof(struct ext4_dir_entry_tail); |
| 1792 | 1755 | ||
| 1793 | blocksize = dir->i_sb->s_blocksize; | 1756 | blocksize = dir->i_sb->s_blocksize; |
| @@ -1853,31 +1816,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1853 | hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; | 1816 | hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; |
| 1854 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 1817 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
| 1855 | ext4fs_dirhash(name, namelen, &hinfo); | 1818 | ext4fs_dirhash(name, namelen, &hinfo); |
| 1819 | memset(frames, 0, sizeof(frames)); | ||
| 1856 | frame = frames; | 1820 | frame = frames; |
| 1857 | frame->entries = entries; | 1821 | frame->entries = entries; |
| 1858 | frame->at = entries; | 1822 | frame->at = entries; |
| 1859 | frame->bh = bh; | 1823 | frame->bh = bh; |
| 1860 | bh = bh2; | 1824 | bh = bh2; |
| 1861 | 1825 | ||
| 1862 | ext4_handle_dirty_dx_node(handle, dir, frame->bh); | 1826 | retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); |
| 1863 | ext4_handle_dirty_dirent_node(handle, dir, bh); | 1827 | if (retval) |
| 1828 | goto out_frames; | ||
| 1829 | retval = ext4_handle_dirty_dirent_node(handle, dir, bh); | ||
| 1830 | if (retval) | ||
| 1831 | goto out_frames; | ||
| 1864 | 1832 | ||
| 1865 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); | 1833 | de = do_split(handle,dir, &bh, frame, &hinfo); |
| 1866 | if (!de) { | 1834 | if (IS_ERR(de)) { |
| 1867 | /* | 1835 | retval = PTR_ERR(de); |
| 1868 | * Even if the block split failed, we have to properly write | 1836 | goto out_frames; |
| 1869 | * out all the changes we did so far. Otherwise we can end up | ||
| 1870 | * with corrupted filesystem. | ||
| 1871 | */ | ||
| 1872 | ext4_mark_inode_dirty(handle, dir); | ||
| 1873 | dx_release(frames); | ||
| 1874 | return retval; | ||
| 1875 | } | 1837 | } |
| 1876 | dx_release(frames); | 1838 | dx_release(frames); |
| 1877 | 1839 | ||
| 1878 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1840 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
| 1879 | brelse(bh); | 1841 | brelse(bh); |
| 1880 | return retval; | 1842 | return retval; |
| 1843 | out_frames: | ||
| 1844 | /* | ||
| 1845 | * Even if the block split failed, we have to properly write | ||
| 1846 | * out all the changes we did so far. Otherwise we can end up | ||
| 1847 | * with corrupted filesystem. | ||
| 1848 | */ | ||
| 1849 | ext4_mark_inode_dirty(handle, dir); | ||
| 1850 | dx_release(frames); | ||
| 1851 | return retval; | ||
| 1881 | } | 1852 | } |
| 1882 | 1853 | ||
| 1883 | /* | 1854 | /* |
| @@ -1904,8 +1875,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1904 | ext4_lblk_t block, blocks; | 1875 | ext4_lblk_t block, blocks; |
| 1905 | int csum_size = 0; | 1876 | int csum_size = 0; |
| 1906 | 1877 | ||
| 1907 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 1878 | if (ext4_has_metadata_csum(inode->i_sb)) |
| 1908 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 1909 | csum_size = sizeof(struct ext4_dir_entry_tail); | 1879 | csum_size = sizeof(struct ext4_dir_entry_tail); |
| 1910 | 1880 | ||
| 1911 | sb = dir->i_sb; | 1881 | sb = dir->i_sb; |
| @@ -1982,9 +1952,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1982 | struct ext4_dir_entry_2 *de; | 1952 | struct ext4_dir_entry_2 *de; |
| 1983 | int err; | 1953 | int err; |
| 1984 | 1954 | ||
| 1985 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); | 1955 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames); |
| 1986 | if (!frame) | 1956 | if (IS_ERR(frame)) |
| 1987 | return err; | 1957 | return PTR_ERR(frame); |
| 1988 | entries = frame->entries; | 1958 | entries = frame->entries; |
| 1989 | at = frame->at; | 1959 | at = frame->at; |
| 1990 | bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); | 1960 | bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); |
| @@ -2095,9 +2065,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 2095 | goto cleanup; | 2065 | goto cleanup; |
| 2096 | } | 2066 | } |
| 2097 | } | 2067 | } |
| 2098 | de = do_split(handle, dir, &bh, frame, &hinfo, &err); | 2068 | de = do_split(handle, dir, &bh, frame, &hinfo); |
| 2099 | if (!de) | 2069 | if (IS_ERR(de)) { |
| 2070 | err = PTR_ERR(de); | ||
| 2100 | goto cleanup; | 2071 | goto cleanup; |
| 2072 | } | ||
| 2101 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); | 2073 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); |
| 2102 | goto cleanup; | 2074 | goto cleanup; |
| 2103 | 2075 | ||
| @@ -2167,8 +2139,7 @@ static int ext4_delete_entry(handle_t *handle, | |||
| 2167 | return err; | 2139 | return err; |
| 2168 | } | 2140 | } |
| 2169 | 2141 | ||
| 2170 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 2142 | if (ext4_has_metadata_csum(dir->i_sb)) |
| 2171 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 2172 | csum_size = sizeof(struct ext4_dir_entry_tail); | 2143 | csum_size = sizeof(struct ext4_dir_entry_tail); |
| 2173 | 2144 | ||
| 2174 | BUFFER_TRACE(bh, "get_write_access"); | 2145 | BUFFER_TRACE(bh, "get_write_access"); |
| @@ -2387,8 +2358,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, | |||
| 2387 | int csum_size = 0; | 2358 | int csum_size = 0; |
| 2388 | int err; | 2359 | int err; |
| 2389 | 2360 | ||
| 2390 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | 2361 | if (ext4_has_metadata_csum(dir->i_sb)) |
| 2391 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 2392 | csum_size = sizeof(struct ext4_dir_entry_tail); | 2362 | csum_size = sizeof(struct ext4_dir_entry_tail); |
| 2393 | 2363 | ||
| 2394 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { | 2364 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { |
| @@ -2403,10 +2373,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, | |||
| 2403 | dir_block = ext4_append(handle, inode, &block); | 2373 | dir_block = ext4_append(handle, inode, &block); |
| 2404 | if (IS_ERR(dir_block)) | 2374 | if (IS_ERR(dir_block)) |
| 2405 | return PTR_ERR(dir_block); | 2375 | return PTR_ERR(dir_block); |
| 2406 | BUFFER_TRACE(dir_block, "get_write_access"); | ||
| 2407 | err = ext4_journal_get_write_access(handle, dir_block); | ||
| 2408 | if (err) | ||
| 2409 | goto out; | ||
| 2410 | de = (struct ext4_dir_entry_2 *)dir_block->b_data; | 2376 | de = (struct ext4_dir_entry_2 *)dir_block->b_data; |
| 2411 | ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); | 2377 | ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); |
| 2412 | set_nlink(inode, 2); | 2378 | set_nlink(inode, 2); |
| @@ -2573,7 +2539,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
| 2573 | int err = 0, rc; | 2539 | int err = 0, rc; |
| 2574 | bool dirty = false; | 2540 | bool dirty = false; |
| 2575 | 2541 | ||
| 2576 | if (!sbi->s_journal) | 2542 | if (!sbi->s_journal || is_bad_inode(inode)) |
| 2577 | return 0; | 2543 | return 0; |
| 2578 | 2544 | ||
| 2579 | WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && | 2545 | WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && |
| @@ -3190,6 +3156,39 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
| 3190 | } | 3156 | } |
| 3191 | } | 3157 | } |
| 3192 | 3158 | ||
| 3159 | static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent, | ||
| 3160 | int credits, handle_t **h) | ||
| 3161 | { | ||
| 3162 | struct inode *wh; | ||
| 3163 | handle_t *handle; | ||
| 3164 | int retries = 0; | ||
| 3165 | |||
| 3166 | /* | ||
| 3167 | * for inode block, sb block, group summaries, | ||
| 3168 | * and inode bitmap | ||
| 3169 | */ | ||
| 3170 | credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) + | ||
| 3171 | EXT4_XATTR_TRANS_BLOCKS + 4); | ||
| 3172 | retry: | ||
| 3173 | wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE, | ||
| 3174 | &ent->dentry->d_name, 0, NULL, | ||
| 3175 | EXT4_HT_DIR, credits); | ||
| 3176 | |||
| 3177 | handle = ext4_journal_current_handle(); | ||
| 3178 | if (IS_ERR(wh)) { | ||
| 3179 | if (handle) | ||
| 3180 | ext4_journal_stop(handle); | ||
| 3181 | if (PTR_ERR(wh) == -ENOSPC && | ||
| 3182 | ext4_should_retry_alloc(ent->dir->i_sb, &retries)) | ||
| 3183 | goto retry; | ||
| 3184 | } else { | ||
| 3185 | *h = handle; | ||
| 3186 | init_special_inode(wh, wh->i_mode, WHITEOUT_DEV); | ||
| 3187 | wh->i_op = &ext4_special_inode_operations; | ||
| 3188 | } | ||
| 3189 | return wh; | ||
| 3190 | } | ||
| 3191 | |||
| 3193 | /* | 3192 | /* |
| 3194 | * Anybody can rename anything with this: the permission checks are left to the | 3193 | * Anybody can rename anything with this: the permission checks are left to the |
| 3195 | * higher-level routines. | 3194 | * higher-level routines. |
| @@ -3199,7 +3198,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | |||
| 3199 | * This comes from rename(const char *oldpath, const char *newpath) | 3198 | * This comes from rename(const char *oldpath, const char *newpath) |
| 3200 | */ | 3199 | */ |
| 3201 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | 3200 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 3202 | struct inode *new_dir, struct dentry *new_dentry) | 3201 | struct inode *new_dir, struct dentry *new_dentry, |
| 3202 | unsigned int flags) | ||
| 3203 | { | 3203 | { |
| 3204 | handle_t *handle = NULL; | 3204 | handle_t *handle = NULL; |
| 3205 | struct ext4_renament old = { | 3205 | struct ext4_renament old = { |
| @@ -3214,6 +3214,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3214 | }; | 3214 | }; |
| 3215 | int force_reread; | 3215 | int force_reread; |
| 3216 | int retval; | 3216 | int retval; |
| 3217 | struct inode *whiteout = NULL; | ||
| 3218 | int credits; | ||
| 3219 | u8 old_file_type; | ||
| 3217 | 3220 | ||
| 3218 | dquot_initialize(old.dir); | 3221 | dquot_initialize(old.dir); |
| 3219 | dquot_initialize(new.dir); | 3222 | dquot_initialize(new.dir); |
| @@ -3252,11 +3255,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3252 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) | 3255 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) |
| 3253 | ext4_alloc_da_blocks(old.inode); | 3256 | ext4_alloc_da_blocks(old.inode); |
| 3254 | 3257 | ||
| 3255 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, | 3258 | credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + |
| 3256 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + | 3259 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); |
| 3257 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | 3260 | if (!(flags & RENAME_WHITEOUT)) { |
| 3258 | if (IS_ERR(handle)) | 3261 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); |
| 3259 | return PTR_ERR(handle); | 3262 | if (IS_ERR(handle)) |
| 3263 | return PTR_ERR(handle); | ||
| 3264 | } else { | ||
| 3265 | whiteout = ext4_whiteout_for_rename(&old, credits, &handle); | ||
| 3266 | if (IS_ERR(whiteout)) | ||
| 3267 | return PTR_ERR(whiteout); | ||
| 3268 | } | ||
| 3260 | 3269 | ||
| 3261 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | 3270 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
| 3262 | ext4_handle_sync(handle); | 3271 | ext4_handle_sync(handle); |
| @@ -3284,13 +3293,26 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3284 | */ | 3293 | */ |
| 3285 | force_reread = (new.dir->i_ino == old.dir->i_ino && | 3294 | force_reread = (new.dir->i_ino == old.dir->i_ino && |
| 3286 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); | 3295 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); |
| 3296 | |||
| 3297 | old_file_type = old.de->file_type; | ||
| 3298 | if (whiteout) { | ||
| 3299 | /* | ||
| 3300 | * Do this before adding a new entry, so the old entry is sure | ||
| 3301 | * to be still pointing to the valid old entry. | ||
| 3302 | */ | ||
| 3303 | retval = ext4_setent(handle, &old, whiteout->i_ino, | ||
| 3304 | EXT4_FT_CHRDEV); | ||
| 3305 | if (retval) | ||
| 3306 | goto end_rename; | ||
| 3307 | ext4_mark_inode_dirty(handle, whiteout); | ||
| 3308 | } | ||
| 3287 | if (!new.bh) { | 3309 | if (!new.bh) { |
| 3288 | retval = ext4_add_entry(handle, new.dentry, old.inode); | 3310 | retval = ext4_add_entry(handle, new.dentry, old.inode); |
| 3289 | if (retval) | 3311 | if (retval) |
| 3290 | goto end_rename; | 3312 | goto end_rename; |
| 3291 | } else { | 3313 | } else { |
| 3292 | retval = ext4_setent(handle, &new, | 3314 | retval = ext4_setent(handle, &new, |
| 3293 | old.inode->i_ino, old.de->file_type); | 3315 | old.inode->i_ino, old_file_type); |
| 3294 | if (retval) | 3316 | if (retval) |
| 3295 | goto end_rename; | 3317 | goto end_rename; |
| 3296 | } | 3318 | } |
| @@ -3305,10 +3327,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3305 | old.inode->i_ctime = ext4_current_time(old.inode); | 3327 | old.inode->i_ctime = ext4_current_time(old.inode); |
| 3306 | ext4_mark_inode_dirty(handle, old.inode); | 3328 | ext4_mark_inode_dirty(handle, old.inode); |
| 3307 | 3329 | ||
| 3308 | /* | 3330 | if (!whiteout) { |
| 3309 | * ok, that's it | 3331 | /* |
| 3310 | */ | 3332 | * ok, that's it |
| 3311 | ext4_rename_delete(handle, &old, force_reread); | 3333 | */ |
| 3334 | ext4_rename_delete(handle, &old, force_reread); | ||
| 3335 | } | ||
| 3312 | 3336 | ||
| 3313 | if (new.inode) { | 3337 | if (new.inode) { |
| 3314 | ext4_dec_count(handle, new.inode); | 3338 | ext4_dec_count(handle, new.inode); |
| @@ -3344,6 +3368,12 @@ end_rename: | |||
| 3344 | brelse(old.dir_bh); | 3368 | brelse(old.dir_bh); |
| 3345 | brelse(old.bh); | 3369 | brelse(old.bh); |
| 3346 | brelse(new.bh); | 3370 | brelse(new.bh); |
| 3371 | if (whiteout) { | ||
| 3372 | if (retval) | ||
| 3373 | drop_nlink(whiteout); | ||
| 3374 | unlock_new_inode(whiteout); | ||
| 3375 | iput(whiteout); | ||
| 3376 | } | ||
| 3347 | if (handle) | 3377 | if (handle) |
| 3348 | ext4_journal_stop(handle); | 3378 | ext4_journal_stop(handle); |
| 3349 | return retval; | 3379 | return retval; |
| @@ -3476,18 +3506,15 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
| 3476 | struct inode *new_dir, struct dentry *new_dentry, | 3506 | struct inode *new_dir, struct dentry *new_dentry, |
| 3477 | unsigned int flags) | 3507 | unsigned int flags) |
| 3478 | { | 3508 | { |
| 3479 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 3509 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
| 3480 | return -EINVAL; | 3510 | return -EINVAL; |
| 3481 | 3511 | ||
| 3482 | if (flags & RENAME_EXCHANGE) { | 3512 | if (flags & RENAME_EXCHANGE) { |
| 3483 | return ext4_cross_rename(old_dir, old_dentry, | 3513 | return ext4_cross_rename(old_dir, old_dentry, |
| 3484 | new_dir, new_dentry); | 3514 | new_dir, new_dentry); |
| 3485 | } | 3515 | } |
| 3486 | /* | 3516 | |
| 3487 | * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" | 3517 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags); |
| 3488 | * is equivalent to regular rename. | ||
| 3489 | */ | ||
| 3490 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
| 3491 | } | 3518 | } |
| 3492 | 3519 | ||
| 3493 | /* | 3520 | /* |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1e43b905ff98..ca4588388fc3 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -1081,7 +1081,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, | |||
| 1081 | break; | 1081 | break; |
| 1082 | 1082 | ||
| 1083 | if (meta_bg == 0) | 1083 | if (meta_bg == 0) |
| 1084 | backup_block = group * bpg + blk_off; | 1084 | backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; |
| 1085 | else | 1085 | else |
| 1086 | backup_block = (ext4_group_first_block_no(sb, group) + | 1086 | backup_block = (ext4_group_first_block_no(sb, group) + |
| 1087 | ext4_bg_has_super(sb, group)); | 1087 | ext4_bg_has_super(sb, group)); |
| @@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb, | |||
| 1212 | { | 1212 | { |
| 1213 | struct buffer_head *bh; | 1213 | struct buffer_head *bh; |
| 1214 | 1214 | ||
| 1215 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1215 | if (!ext4_has_metadata_csum(sb)) |
| 1216 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 1217 | return 0; | 1216 | return 0; |
| 1218 | 1217 | ||
| 1219 | bh = ext4_get_bitmap(sb, group_data->inode_bitmap); | 1218 | bh = ext4_get_bitmap(sb, group_data->inode_bitmap); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 05c159218bc2..2c9e6864abd9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
| 70 | static void ext4_clear_journal_err(struct super_block *sb, | 70 | static void ext4_clear_journal_err(struct super_block *sb, |
| 71 | struct ext4_super_block *es); | 71 | struct ext4_super_block *es); |
| 72 | static int ext4_sync_fs(struct super_block *sb, int wait); | 72 | static int ext4_sync_fs(struct super_block *sb, int wait); |
| 73 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); | ||
| 74 | static int ext4_remount(struct super_block *sb, int *flags, char *data); | 73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); |
| 75 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | 74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); |
| 76 | static int ext4_unfreeze(struct super_block *sb); | 75 | static int ext4_unfreeze(struct super_block *sb); |
| @@ -141,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb, | |||
| 141 | static int ext4_superblock_csum_verify(struct super_block *sb, | 140 | static int ext4_superblock_csum_verify(struct super_block *sb, |
| 142 | struct ext4_super_block *es) | 141 | struct ext4_super_block *es) |
| 143 | { | 142 | { |
| 144 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 143 | if (!ext4_has_metadata_csum(sb)) |
| 145 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 146 | return 1; | 144 | return 1; |
| 147 | 145 | ||
| 148 | return es->s_checksum == ext4_superblock_csum(sb, es); | 146 | return es->s_checksum == ext4_superblock_csum(sb, es); |
| @@ -152,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb) | |||
| 152 | { | 150 | { |
| 153 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 151 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
| 154 | 152 | ||
| 155 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 153 | if (!ext4_has_metadata_csum(sb)) |
| 156 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 157 | return; | 154 | return; |
| 158 | 155 | ||
| 159 | es->s_checksum = ext4_superblock_csum(sb, es); | 156 | es->s_checksum = ext4_superblock_csum(sb, es); |
| @@ -820,10 +817,9 @@ static void ext4_put_super(struct super_block *sb) | |||
| 820 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 817 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| 821 | percpu_counter_destroy(&sbi->s_dirs_counter); | 818 | percpu_counter_destroy(&sbi->s_dirs_counter); |
| 822 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); | 819 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
| 823 | percpu_counter_destroy(&sbi->s_extent_cache_cnt); | ||
| 824 | brelse(sbi->s_sbh); | 820 | brelse(sbi->s_sbh); |
| 825 | #ifdef CONFIG_QUOTA | 821 | #ifdef CONFIG_QUOTA |
| 826 | for (i = 0; i < MAXQUOTAS; i++) | 822 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
| 827 | kfree(sbi->s_qf_names[i]); | 823 | kfree(sbi->s_qf_names[i]); |
| 828 | #endif | 824 | #endif |
| 829 | 825 | ||
| @@ -885,6 +881,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 885 | ext4_es_init_tree(&ei->i_es_tree); | 881 | ext4_es_init_tree(&ei->i_es_tree); |
| 886 | rwlock_init(&ei->i_es_lock); | 882 | rwlock_init(&ei->i_es_lock); |
| 887 | INIT_LIST_HEAD(&ei->i_es_lru); | 883 | INIT_LIST_HEAD(&ei->i_es_lru); |
| 884 | ei->i_es_all_nr = 0; | ||
| 888 | ei->i_es_lru_nr = 0; | 885 | ei->i_es_lru_nr = 0; |
| 889 | ei->i_touch_when = 0; | 886 | ei->i_touch_when = 0; |
| 890 | ei->i_reserved_data_blocks = 0; | 887 | ei->i_reserved_data_blocks = 0; |
| @@ -1002,7 +999,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, | |||
| 1002 | * Currently we don't know the generation for parent directory, so | 999 | * Currently we don't know the generation for parent directory, so |
| 1003 | * a generation of 0 means "accept any" | 1000 | * a generation of 0 means "accept any" |
| 1004 | */ | 1001 | */ |
| 1005 | inode = ext4_iget(sb, ino); | 1002 | inode = ext4_iget_normal(sb, ino); |
| 1006 | if (IS_ERR(inode)) | 1003 | if (IS_ERR(inode)) |
| 1007 | return ERR_CAST(inode); | 1004 | return ERR_CAST(inode); |
| 1008 | if (generation && inode->i_generation != generation) { | 1005 | if (generation && inode->i_generation != generation) { |
| @@ -1124,25 +1121,6 @@ static const struct super_operations ext4_sops = { | |||
| 1124 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1121 | .bdev_try_to_free_page = bdev_try_to_free_page, |
| 1125 | }; | 1122 | }; |
| 1126 | 1123 | ||
| 1127 | static const struct super_operations ext4_nojournal_sops = { | ||
| 1128 | .alloc_inode = ext4_alloc_inode, | ||
| 1129 | .destroy_inode = ext4_destroy_inode, | ||
| 1130 | .write_inode = ext4_write_inode, | ||
| 1131 | .dirty_inode = ext4_dirty_inode, | ||
| 1132 | .drop_inode = ext4_drop_inode, | ||
| 1133 | .evict_inode = ext4_evict_inode, | ||
| 1134 | .sync_fs = ext4_sync_fs_nojournal, | ||
| 1135 | .put_super = ext4_put_super, | ||
| 1136 | .statfs = ext4_statfs, | ||
| 1137 | .remount_fs = ext4_remount, | ||
| 1138 | .show_options = ext4_show_options, | ||
| 1139 | #ifdef CONFIG_QUOTA | ||
| 1140 | .quota_read = ext4_quota_read, | ||
| 1141 | .quota_write = ext4_quota_write, | ||
| 1142 | #endif | ||
| 1143 | .bdev_try_to_free_page = bdev_try_to_free_page, | ||
| 1144 | }; | ||
| 1145 | |||
| 1146 | static const struct export_operations ext4_export_ops = { | 1124 | static const struct export_operations ext4_export_ops = { |
| 1147 | .fh_to_dentry = ext4_fh_to_dentry, | 1125 | .fh_to_dentry = ext4_fh_to_dentry, |
| 1148 | .fh_to_parent = ext4_fh_to_parent, | 1126 | .fh_to_parent = ext4_fh_to_parent, |
| @@ -1712,13 +1690,6 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1712 | "not specified"); | 1690 | "not specified"); |
| 1713 | return 0; | 1691 | return 0; |
| 1714 | } | 1692 | } |
| 1715 | } else { | ||
| 1716 | if (sbi->s_jquota_fmt) { | ||
| 1717 | ext4_msg(sb, KERN_ERR, "journaled quota format " | ||
| 1718 | "specified with no journaling " | ||
| 1719 | "enabled"); | ||
| 1720 | return 0; | ||
| 1721 | } | ||
| 1722 | } | 1693 | } |
| 1723 | #endif | 1694 | #endif |
| 1724 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 1695 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
| @@ -2016,8 +1987,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | |||
| 2016 | __u16 crc = 0; | 1987 | __u16 crc = 0; |
| 2017 | __le32 le_group = cpu_to_le32(block_group); | 1988 | __le32 le_group = cpu_to_le32(block_group); |
| 2018 | 1989 | ||
| 2019 | if ((sbi->s_es->s_feature_ro_compat & | 1990 | if (ext4_has_metadata_csum(sbi->s_sb)) { |
| 2020 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { | ||
| 2021 | /* Use new metadata_csum algorithm */ | 1991 | /* Use new metadata_csum algorithm */ |
| 2022 | __le16 save_csum; | 1992 | __le16 save_csum; |
| 2023 | __u32 csum32; | 1993 | __u32 csum32; |
| @@ -2035,6 +2005,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | |||
| 2035 | } | 2005 | } |
| 2036 | 2006 | ||
| 2037 | /* old crc16 code */ | 2007 | /* old crc16 code */ |
| 2008 | if (!(sbi->s_es->s_feature_ro_compat & | ||
| 2009 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) | ||
| 2010 | return 0; | ||
| 2011 | |||
| 2038 | offset = offsetof(struct ext4_group_desc, bg_checksum); | 2012 | offset = offsetof(struct ext4_group_desc, bg_checksum); |
| 2039 | 2013 | ||
| 2040 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); | 2014 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); |
| @@ -2191,7 +2165,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
| 2191 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2165 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
| 2192 | /* don't clear list on RO mount w/ errors */ | 2166 | /* don't clear list on RO mount w/ errors */ |
| 2193 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { | 2167 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { |
| 2194 | jbd_debug(1, "Errors on filesystem, " | 2168 | ext4_msg(sb, KERN_INFO, "Errors on filesystem, " |
| 2195 | "clearing orphan list.\n"); | 2169 | "clearing orphan list.\n"); |
| 2196 | es->s_last_orphan = 0; | 2170 | es->s_last_orphan = 0; |
| 2197 | } | 2171 | } |
| @@ -2207,7 +2181,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
| 2207 | /* Needed for iput() to work correctly and not trash data */ | 2181 | /* Needed for iput() to work correctly and not trash data */ |
| 2208 | sb->s_flags |= MS_ACTIVE; | 2182 | sb->s_flags |= MS_ACTIVE; |
| 2209 | /* Turn on quotas so that they are updated correctly */ | 2183 | /* Turn on quotas so that they are updated correctly */ |
| 2210 | for (i = 0; i < MAXQUOTAS; i++) { | 2184 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
| 2211 | if (EXT4_SB(sb)->s_qf_names[i]) { | 2185 | if (EXT4_SB(sb)->s_qf_names[i]) { |
| 2212 | int ret = ext4_quota_on_mount(sb, i); | 2186 | int ret = ext4_quota_on_mount(sb, i); |
| 2213 | if (ret < 0) | 2187 | if (ret < 0) |
| @@ -2263,7 +2237,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
| 2263 | PLURAL(nr_truncates)); | 2237 | PLURAL(nr_truncates)); |
| 2264 | #ifdef CONFIG_QUOTA | 2238 | #ifdef CONFIG_QUOTA |
| 2265 | /* Turn quotas off */ | 2239 | /* Turn quotas off */ |
| 2266 | for (i = 0; i < MAXQUOTAS; i++) { | 2240 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
| 2267 | if (sb_dqopt(sb)->files[i]) | 2241 | if (sb_dqopt(sb)->files[i]) |
| 2268 | dquot_quota_off(sb, i); | 2242 | dquot_quota_off(sb, i); |
| 2269 | } | 2243 | } |
| @@ -2548,6 +2522,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, | |||
| 2548 | return count; | 2522 | return count; |
| 2549 | } | 2523 | } |
| 2550 | 2524 | ||
| 2525 | static ssize_t es_ui_show(struct ext4_attr *a, | ||
| 2526 | struct ext4_sb_info *sbi, char *buf) | ||
| 2527 | { | ||
| 2528 | |||
| 2529 | unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + | ||
| 2530 | a->u.offset); | ||
| 2531 | |||
| 2532 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | ||
| 2533 | } | ||
| 2534 | |||
| 2551 | static ssize_t reserved_clusters_show(struct ext4_attr *a, | 2535 | static ssize_t reserved_clusters_show(struct ext4_attr *a, |
| 2552 | struct ext4_sb_info *sbi, char *buf) | 2536 | struct ext4_sb_info *sbi, char *buf) |
| 2553 | { | 2537 | { |
| @@ -2601,14 +2585,29 @@ static struct ext4_attr ext4_attr_##_name = { \ | |||
| 2601 | .offset = offsetof(struct ext4_sb_info, _elname),\ | 2585 | .offset = offsetof(struct ext4_sb_info, _elname),\ |
| 2602 | }, \ | 2586 | }, \ |
| 2603 | } | 2587 | } |
| 2588 | |||
| 2589 | #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ | ||
| 2590 | static struct ext4_attr ext4_attr_##_name = { \ | ||
| 2591 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | ||
| 2592 | .show = _show, \ | ||
| 2593 | .store = _store, \ | ||
| 2594 | .u = { \ | ||
| 2595 | .offset = offsetof(struct ext4_super_block, _elname), \ | ||
| 2596 | }, \ | ||
| 2597 | } | ||
| 2598 | |||
| 2604 | #define EXT4_ATTR(name, mode, show, store) \ | 2599 | #define EXT4_ATTR(name, mode, show, store) \ |
| 2605 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2600 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
| 2606 | 2601 | ||
| 2607 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) | 2602 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) |
| 2608 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) | 2603 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) |
| 2609 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) | 2604 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) |
| 2605 | |||
| 2606 | #define EXT4_RO_ATTR_ES_UI(name, elname) \ | ||
| 2607 | EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) | ||
| 2610 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2608 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
| 2611 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) | 2609 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) |
| 2610 | |||
| 2612 | #define ATTR_LIST(name) &ext4_attr_##name.attr | 2611 | #define ATTR_LIST(name) &ext4_attr_##name.attr |
| 2613 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ | 2612 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ |
| 2614 | static struct ext4_attr ext4_attr_##_name = { \ | 2613 | static struct ext4_attr ext4_attr_##_name = { \ |
| @@ -2641,6 +2640,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int | |||
| 2641 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); | 2640 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); |
| 2642 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); | 2641 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); |
| 2643 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); | 2642 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); |
| 2643 | EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); | ||
| 2644 | EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); | ||
| 2645 | EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); | ||
| 2644 | 2646 | ||
| 2645 | static struct attribute *ext4_attrs[] = { | 2647 | static struct attribute *ext4_attrs[] = { |
| 2646 | ATTR_LIST(delayed_allocation_blocks), | 2648 | ATTR_LIST(delayed_allocation_blocks), |
| @@ -2664,6 +2666,9 @@ static struct attribute *ext4_attrs[] = { | |||
| 2664 | ATTR_LIST(warning_ratelimit_burst), | 2666 | ATTR_LIST(warning_ratelimit_burst), |
| 2665 | ATTR_LIST(msg_ratelimit_interval_ms), | 2667 | ATTR_LIST(msg_ratelimit_interval_ms), |
| 2666 | ATTR_LIST(msg_ratelimit_burst), | 2668 | ATTR_LIST(msg_ratelimit_burst), |
| 2669 | ATTR_LIST(errors_count), | ||
| 2670 | ATTR_LIST(first_error_time), | ||
| 2671 | ATTR_LIST(last_error_time), | ||
| 2667 | NULL, | 2672 | NULL, |
| 2668 | }; | 2673 | }; |
| 2669 | 2674 | ||
| @@ -2723,9 +2728,25 @@ static void ext4_feat_release(struct kobject *kobj) | |||
| 2723 | complete(&ext4_feat->f_kobj_unregister); | 2728 | complete(&ext4_feat->f_kobj_unregister); |
| 2724 | } | 2729 | } |
| 2725 | 2730 | ||
| 2731 | static ssize_t ext4_feat_show(struct kobject *kobj, | ||
| 2732 | struct attribute *attr, char *buf) | ||
| 2733 | { | ||
| 2734 | return snprintf(buf, PAGE_SIZE, "supported\n"); | ||
| 2735 | } | ||
| 2736 | |||
| 2737 | /* | ||
| 2738 | * We can not use ext4_attr_show/store because it relies on the kobject | ||
| 2739 | * being embedded in the ext4_sb_info structure which is definitely not | ||
| 2740 | * true in this case. | ||
| 2741 | */ | ||
| 2742 | static const struct sysfs_ops ext4_feat_ops = { | ||
| 2743 | .show = ext4_feat_show, | ||
| 2744 | .store = NULL, | ||
| 2745 | }; | ||
| 2746 | |||
| 2726 | static struct kobj_type ext4_feat_ktype = { | 2747 | static struct kobj_type ext4_feat_ktype = { |
| 2727 | .default_attrs = ext4_feat_attrs, | 2748 | .default_attrs = ext4_feat_attrs, |
| 2728 | .sysfs_ops = &ext4_attr_ops, | 2749 | .sysfs_ops = &ext4_feat_ops, |
| 2729 | .release = ext4_feat_release, | 2750 | .release = ext4_feat_release, |
| 2730 | }; | 2751 | }; |
| 2731 | 2752 | ||
| @@ -3179,8 +3200,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
| 3179 | int compat, incompat; | 3200 | int compat, incompat; |
| 3180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3201 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 3181 | 3202 | ||
| 3182 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3203 | if (ext4_has_metadata_csum(sb)) { |
| 3183 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
| 3184 | /* journal checksum v3 */ | 3204 | /* journal checksum v3 */ |
| 3185 | compat = 0; | 3205 | compat = 0; |
| 3186 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; | 3206 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; |
| @@ -3190,6 +3210,10 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
| 3190 | incompat = 0; | 3210 | incompat = 0; |
| 3191 | } | 3211 | } |
| 3192 | 3212 | ||
| 3213 | jbd2_journal_clear_features(sbi->s_journal, | ||
| 3214 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
| 3215 | JBD2_FEATURE_INCOMPAT_CSUM_V3 | | ||
| 3216 | JBD2_FEATURE_INCOMPAT_CSUM_V2); | ||
| 3193 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 3217 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { |
| 3194 | ret = jbd2_journal_set_features(sbi->s_journal, | 3218 | ret = jbd2_journal_set_features(sbi->s_journal, |
| 3195 | compat, 0, | 3219 | compat, 0, |
| @@ -3202,11 +3226,8 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
| 3202 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 3226 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
| 3203 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 3227 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
| 3204 | } else { | 3228 | } else { |
| 3205 | jbd2_journal_clear_features(sbi->s_journal, | 3229 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
| 3206 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 3230 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
| 3207 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | | ||
| 3208 | JBD2_FEATURE_INCOMPAT_CSUM_V3 | | ||
| 3209 | JBD2_FEATURE_INCOMPAT_CSUM_V2); | ||
| 3210 | } | 3231 | } |
| 3211 | 3232 | ||
| 3212 | return ret; | 3233 | return ret; |
| @@ -3436,7 +3457,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3436 | logical_sb_block = sb_block; | 3457 | logical_sb_block = sb_block; |
| 3437 | } | 3458 | } |
| 3438 | 3459 | ||
| 3439 | if (!(bh = sb_bread(sb, logical_sb_block))) { | 3460 | if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { |
| 3440 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); | 3461 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); |
| 3441 | goto out_fail; | 3462 | goto out_fail; |
| 3442 | } | 3463 | } |
| @@ -3487,8 +3508,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3487 | } | 3508 | } |
| 3488 | 3509 | ||
| 3489 | /* Precompute checksum seed for all metadata */ | 3510 | /* Precompute checksum seed for all metadata */ |
| 3490 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3511 | if (ext4_has_metadata_csum(sb)) |
| 3491 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 3492 | sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, | 3512 | sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, |
| 3493 | sizeof(es->s_uuid)); | 3513 | sizeof(es->s_uuid)); |
| 3494 | 3514 | ||
| @@ -3506,6 +3526,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3506 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 3526 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 3507 | set_opt(sb, POSIX_ACL); | 3527 | set_opt(sb, POSIX_ACL); |
| 3508 | #endif | 3528 | #endif |
| 3529 | /* don't forget to enable journal_csum when metadata_csum is enabled. */ | ||
| 3530 | if (ext4_has_metadata_csum(sb)) | ||
| 3531 | set_opt(sb, JOURNAL_CHECKSUM); | ||
| 3532 | |||
| 3509 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 3533 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
| 3510 | set_opt(sb, JOURNAL_DATA); | 3534 | set_opt(sb, JOURNAL_DATA); |
| 3511 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 3535 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
| @@ -3519,8 +3543,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3519 | set_opt(sb, ERRORS_CONT); | 3543 | set_opt(sb, ERRORS_CONT); |
| 3520 | else | 3544 | else |
| 3521 | set_opt(sb, ERRORS_RO); | 3545 | set_opt(sb, ERRORS_RO); |
| 3522 | if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) | 3546 | /* block_validity enabled by default; disable with noblock_validity */ |
| 3523 | set_opt(sb, BLOCK_VALIDITY); | 3547 | set_opt(sb, BLOCK_VALIDITY); |
| 3524 | if (def_mount_opts & EXT4_DEFM_DISCARD) | 3548 | if (def_mount_opts & EXT4_DEFM_DISCARD) |
| 3525 | set_opt(sb, DISCARD); | 3549 | set_opt(sb, DISCARD); |
| 3526 | 3550 | ||
| @@ -3646,7 +3670,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3646 | brelse(bh); | 3670 | brelse(bh); |
| 3647 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; | 3671 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; |
| 3648 | offset = do_div(logical_sb_block, blocksize); | 3672 | offset = do_div(logical_sb_block, blocksize); |
| 3649 | bh = sb_bread(sb, logical_sb_block); | 3673 | bh = sb_bread_unmovable(sb, logical_sb_block); |
| 3650 | if (!bh) { | 3674 | if (!bh) { |
| 3651 | ext4_msg(sb, KERN_ERR, | 3675 | ext4_msg(sb, KERN_ERR, |
| 3652 | "Can't read superblock on 2nd try"); | 3676 | "Can't read superblock on 2nd try"); |
| @@ -3868,7 +3892,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3868 | 3892 | ||
| 3869 | for (i = 0; i < db_count; i++) { | 3893 | for (i = 0; i < db_count; i++) { |
| 3870 | block = descriptor_loc(sb, logical_sb_block, i); | 3894 | block = descriptor_loc(sb, logical_sb_block, i); |
| 3871 | sbi->s_group_desc[i] = sb_bread(sb, block); | 3895 | sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); |
| 3872 | if (!sbi->s_group_desc[i]) { | 3896 | if (!sbi->s_group_desc[i]) { |
| 3873 | ext4_msg(sb, KERN_ERR, | 3897 | ext4_msg(sb, KERN_ERR, |
| 3874 | "can't read group descriptor %d", i); | 3898 | "can't read group descriptor %d", i); |
| @@ -3890,13 +3914,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3890 | sbi->s_err_report.data = (unsigned long) sb; | 3914 | sbi->s_err_report.data = (unsigned long) sb; |
| 3891 | 3915 | ||
| 3892 | /* Register extent status tree shrinker */ | 3916 | /* Register extent status tree shrinker */ |
| 3893 | ext4_es_register_shrinker(sbi); | 3917 | if (ext4_es_register_shrinker(sbi)) |
| 3894 | |||
| 3895 | err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); | ||
| 3896 | if (err) { | ||
| 3897 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
| 3898 | goto failed_mount3; | 3918 | goto failed_mount3; |
| 3899 | } | ||
| 3900 | 3919 | ||
| 3901 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3920 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
| 3902 | sbi->s_extent_max_zeroout_kb = 32; | 3921 | sbi->s_extent_max_zeroout_kb = 32; |
| @@ -3904,11 +3923,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3904 | /* | 3923 | /* |
| 3905 | * set up enough so that it can read an inode | 3924 | * set up enough so that it can read an inode |
| 3906 | */ | 3925 | */ |
| 3907 | if (!test_opt(sb, NOLOAD) && | 3926 | sb->s_op = &ext4_sops; |
| 3908 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) | ||
| 3909 | sb->s_op = &ext4_sops; | ||
| 3910 | else | ||
| 3911 | sb->s_op = &ext4_nojournal_sops; | ||
| 3912 | sb->s_export_op = &ext4_export_ops; | 3927 | sb->s_export_op = &ext4_export_ops; |
| 3913 | sb->s_xattr = ext4_xattr_handlers; | 3928 | sb->s_xattr = ext4_xattr_handlers; |
| 3914 | #ifdef CONFIG_QUOTA | 3929 | #ifdef CONFIG_QUOTA |
| @@ -3932,7 +3947,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3932 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && | 3947 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && |
| 3933 | !(sb->s_flags & MS_RDONLY)) | 3948 | !(sb->s_flags & MS_RDONLY)) |
| 3934 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) | 3949 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) |
| 3935 | goto failed_mount3; | 3950 | goto failed_mount3a; |
| 3936 | 3951 | ||
| 3937 | /* | 3952 | /* |
| 3938 | * The first inode we look at is the journal inode. Don't try | 3953 | * The first inode we look at is the journal inode. Don't try |
| @@ -3941,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3941 | if (!test_opt(sb, NOLOAD) && | 3956 | if (!test_opt(sb, NOLOAD) && |
| 3942 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { | 3957 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { |
| 3943 | if (ext4_load_journal(sb, es, journal_devnum)) | 3958 | if (ext4_load_journal(sb, es, journal_devnum)) |
| 3944 | goto failed_mount3; | 3959 | goto failed_mount3a; |
| 3945 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && | 3960 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && |
| 3946 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 3961 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
| 3947 | ext4_msg(sb, KERN_ERR, "required journal recovery " | 3962 | ext4_msg(sb, KERN_ERR, "required journal recovery " |
| @@ -4229,10 +4244,10 @@ failed_mount_wq: | |||
| 4229 | jbd2_journal_destroy(sbi->s_journal); | 4244 | jbd2_journal_destroy(sbi->s_journal); |
| 4230 | sbi->s_journal = NULL; | 4245 | sbi->s_journal = NULL; |
| 4231 | } | 4246 | } |
| 4232 | failed_mount3: | 4247 | failed_mount3a: |
| 4233 | ext4_es_unregister_shrinker(sbi); | 4248 | ext4_es_unregister_shrinker(sbi); |
| 4249 | failed_mount3: | ||
| 4234 | del_timer_sync(&sbi->s_err_report); | 4250 | del_timer_sync(&sbi->s_err_report); |
| 4235 | percpu_counter_destroy(&sbi->s_extent_cache_cnt); | ||
| 4236 | if (sbi->s_mmp_tsk) | 4251 | if (sbi->s_mmp_tsk) |
| 4237 | kthread_stop(sbi->s_mmp_tsk); | 4252 | kthread_stop(sbi->s_mmp_tsk); |
| 4238 | failed_mount2: | 4253 | failed_mount2: |
| @@ -4247,7 +4262,7 @@ failed_mount: | |||
| 4247 | remove_proc_entry(sb->s_id, ext4_proc_root); | 4262 | remove_proc_entry(sb->s_id, ext4_proc_root); |
| 4248 | } | 4263 | } |
| 4249 | #ifdef CONFIG_QUOTA | 4264 | #ifdef CONFIG_QUOTA |
| 4250 | for (i = 0; i < MAXQUOTAS; i++) | 4265 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
| 4251 | kfree(sbi->s_qf_names[i]); | 4266 | kfree(sbi->s_qf_names[i]); |
| 4252 | #endif | 4267 | #endif |
| 4253 | ext4_blkdev_remove(sbi); | 4268 | ext4_blkdev_remove(sbi); |
| @@ -4375,6 +4390,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
| 4375 | goto out_bdev; | 4390 | goto out_bdev; |
| 4376 | } | 4391 | } |
| 4377 | 4392 | ||
| 4393 | if ((le32_to_cpu(es->s_feature_ro_compat) & | ||
| 4394 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
| 4395 | es->s_checksum != ext4_superblock_csum(sb, es)) { | ||
| 4396 | ext4_msg(sb, KERN_ERR, "external journal has " | ||
| 4397 | "corrupt superblock"); | ||
| 4398 | brelse(bh); | ||
| 4399 | goto out_bdev; | ||
| 4400 | } | ||
| 4401 | |||
| 4378 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { | 4402 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { |
| 4379 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); | 4403 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); |
| 4380 | brelse(bh); | 4404 | brelse(bh); |
| @@ -4677,15 +4701,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
| 4677 | * being sent at the end of the function. But we can skip it if | 4701 | * being sent at the end of the function. But we can skip it if |
| 4678 | * transaction_commit will do it for us. | 4702 | * transaction_commit will do it for us. |
| 4679 | */ | 4703 | */ |
| 4680 | target = jbd2_get_latest_transaction(sbi->s_journal); | 4704 | if (sbi->s_journal) { |
| 4681 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && | 4705 | target = jbd2_get_latest_transaction(sbi->s_journal); |
| 4682 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | 4706 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && |
| 4707 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | ||
| 4708 | needs_barrier = true; | ||
| 4709 | |||
| 4710 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | ||
| 4711 | if (wait) | ||
| 4712 | ret = jbd2_log_wait_commit(sbi->s_journal, | ||
| 4713 | target); | ||
| 4714 | } | ||
| 4715 | } else if (wait && test_opt(sb, BARRIER)) | ||
| 4683 | needs_barrier = true; | 4716 | needs_barrier = true; |
| 4684 | |||
| 4685 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | ||
| 4686 | if (wait) | ||
| 4687 | ret = jbd2_log_wait_commit(sbi->s_journal, target); | ||
| 4688 | } | ||
| 4689 | if (needs_barrier) { | 4717 | if (needs_barrier) { |
| 4690 | int err; | 4718 | int err; |
| 4691 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | 4719 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); |
| @@ -4696,19 +4724,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
| 4696 | return ret; | 4724 | return ret; |
| 4697 | } | 4725 | } |
| 4698 | 4726 | ||
| 4699 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) | ||
| 4700 | { | ||
| 4701 | int ret = 0; | ||
| 4702 | |||
| 4703 | trace_ext4_sync_fs(sb, wait); | ||
| 4704 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
| 4705 | dquot_writeback_dquots(sb, -1); | ||
| 4706 | if (wait && test_opt(sb, BARRIER)) | ||
| 4707 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
| 4708 | |||
| 4709 | return ret; | ||
| 4710 | } | ||
| 4711 | |||
| 4712 | /* | 4727 | /* |
| 4713 | * LVM calls this function before a (read-only) snapshot is created. This | 4728 | * LVM calls this function before a (read-only) snapshot is created. This |
| 4714 | * gives us a chance to flush the journal completely and mark the fs clean. | 4729 | * gives us a chance to flush the journal completely and mark the fs clean. |
| @@ -4727,23 +4742,26 @@ static int ext4_freeze(struct super_block *sb) | |||
| 4727 | 4742 | ||
| 4728 | journal = EXT4_SB(sb)->s_journal; | 4743 | journal = EXT4_SB(sb)->s_journal; |
| 4729 | 4744 | ||
| 4730 | /* Now we set up the journal barrier. */ | 4745 | if (journal) { |
| 4731 | jbd2_journal_lock_updates(journal); | 4746 | /* Now we set up the journal barrier. */ |
| 4747 | jbd2_journal_lock_updates(journal); | ||
| 4732 | 4748 | ||
| 4733 | /* | 4749 | /* |
| 4734 | * Don't clear the needs_recovery flag if we failed to flush | 4750 | * Don't clear the needs_recovery flag if we failed to |
| 4735 | * the journal. | 4751 | * flush the journal. |
| 4736 | */ | 4752 | */ |
| 4737 | error = jbd2_journal_flush(journal); | 4753 | error = jbd2_journal_flush(journal); |
| 4738 | if (error < 0) | 4754 | if (error < 0) |
| 4739 | goto out; | 4755 | goto out; |
| 4756 | } | ||
| 4740 | 4757 | ||
| 4741 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 4758 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
| 4742 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4759 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
| 4743 | error = ext4_commit_super(sb, 1); | 4760 | error = ext4_commit_super(sb, 1); |
| 4744 | out: | 4761 | out: |
| 4745 | /* we rely on upper layer to stop further updates */ | 4762 | if (journal) |
| 4746 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 4763 | /* we rely on upper layer to stop further updates */ |
| 4764 | jbd2_journal_unlock_updates(journal); | ||
| 4747 | return error; | 4765 | return error; |
| 4748 | } | 4766 | } |
| 4749 | 4767 | ||
| @@ -4774,7 +4792,7 @@ struct ext4_mount_options { | |||
| 4774 | u32 s_min_batch_time, s_max_batch_time; | 4792 | u32 s_min_batch_time, s_max_batch_time; |
| 4775 | #ifdef CONFIG_QUOTA | 4793 | #ifdef CONFIG_QUOTA |
| 4776 | int s_jquota_fmt; | 4794 | int s_jquota_fmt; |
| 4777 | char *s_qf_names[MAXQUOTAS]; | 4795 | char *s_qf_names[EXT4_MAXQUOTAS]; |
| 4778 | #endif | 4796 | #endif |
| 4779 | }; | 4797 | }; |
| 4780 | 4798 | ||
| @@ -4804,7 +4822,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
| 4804 | old_opts.s_max_batch_time = sbi->s_max_batch_time; | 4822 | old_opts.s_max_batch_time = sbi->s_max_batch_time; |
| 4805 | #ifdef CONFIG_QUOTA | 4823 | #ifdef CONFIG_QUOTA |
| 4806 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; | 4824 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; |
| 4807 | for (i = 0; i < MAXQUOTAS; i++) | 4825 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
| 4808 | if (sbi->s_qf_names[i]) { | 4826 | if (sbi->s_qf_names[i]) { |
| 4809 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], | 4827 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], |
| 4810 | GFP_KERNEL); | 4828 | GFP_KERNEL); |
| @@ -4828,6 +4846,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
| 4828 | goto restore_opts; | 4846 | goto restore_opts; |
| 4829 | } | 4847 | } |
| 4830 | 4848 | ||
| 4849 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ | ||
| 4850 | test_opt(sb, JOURNAL_CHECKSUM)) { | ||
| 4851 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " | ||
| 4852 | "during remount not supported"); | ||
| 4853 | err = -EINVAL; | ||
| 4854 | goto restore_opts; | ||
| 4855 | } | ||
| 4856 | |||
| 4831 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 4857 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
| 4832 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | 4858 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { |
| 4833 | ext4_msg(sb, KERN_ERR, "can't mount with " | 4859 | ext4_msg(sb, KERN_ERR, "can't mount with " |
| @@ -4965,7 +4991,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
| 4965 | 4991 | ||
| 4966 | #ifdef CONFIG_QUOTA | 4992 | #ifdef CONFIG_QUOTA |
| 4967 | /* Release old quota file names */ | 4993 | /* Release old quota file names */ |
| 4968 | for (i = 0; i < MAXQUOTAS; i++) | 4994 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
| 4969 | kfree(old_opts.s_qf_names[i]); | 4995 | kfree(old_opts.s_qf_names[i]); |
| 4970 | if (enable_quota) { | 4996 | if (enable_quota) { |
| 4971 | if (sb_any_quota_suspended(sb)) | 4997 | if (sb_any_quota_suspended(sb)) |
| @@ -4994,7 +5020,7 @@ restore_opts: | |||
| 4994 | sbi->s_max_batch_time = old_opts.s_max_batch_time; | 5020 | sbi->s_max_batch_time = old_opts.s_max_batch_time; |
| 4995 | #ifdef CONFIG_QUOTA | 5021 | #ifdef CONFIG_QUOTA |
| 4996 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; | 5022 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; |
| 4997 | for (i = 0; i < MAXQUOTAS; i++) { | 5023 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
| 4998 | kfree(sbi->s_qf_names[i]); | 5024 | kfree(sbi->s_qf_names[i]); |
| 4999 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 5025 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
| 5000 | } | 5026 | } |
| @@ -5197,7 +5223,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, | |||
| 5197 | { | 5223 | { |
| 5198 | int err; | 5224 | int err; |
| 5199 | struct inode *qf_inode; | 5225 | struct inode *qf_inode; |
| 5200 | unsigned long qf_inums[MAXQUOTAS] = { | 5226 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
| 5201 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5227 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
| 5202 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5228 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) |
| 5203 | }; | 5229 | }; |
| @@ -5225,13 +5251,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, | |||
| 5225 | static int ext4_enable_quotas(struct super_block *sb) | 5251 | static int ext4_enable_quotas(struct super_block *sb) |
| 5226 | { | 5252 | { |
| 5227 | int type, err = 0; | 5253 | int type, err = 0; |
| 5228 | unsigned long qf_inums[MAXQUOTAS] = { | 5254 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
| 5229 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5255 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
| 5230 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5256 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) |
| 5231 | }; | 5257 | }; |
| 5232 | 5258 | ||
| 5233 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; | 5259 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; |
| 5234 | for (type = 0; type < MAXQUOTAS; type++) { | 5260 | for (type = 0; type < EXT4_MAXQUOTAS; type++) { |
| 5235 | if (qf_inums[type]) { | 5261 | if (qf_inums[type]) { |
| 5236 | err = ext4_quota_enable(sb, type, QFMT_VFS_V1, | 5262 | err = ext4_quota_enable(sb, type, QFMT_VFS_V1, |
| 5237 | DQUOT_USAGE_ENABLED); | 5263 | DQUOT_USAGE_ENABLED); |
| @@ -5309,7 +5335,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | |||
| 5309 | { | 5335 | { |
| 5310 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5336 | struct inode *inode = sb_dqopt(sb)->files[type]; |
| 5311 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 5337 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
| 5312 | int err = 0; | ||
| 5313 | int offset = off & (sb->s_blocksize - 1); | 5338 | int offset = off & (sb->s_blocksize - 1); |
| 5314 | int tocopy; | 5339 | int tocopy; |
| 5315 | size_t toread; | 5340 | size_t toread; |
| @@ -5324,9 +5349,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | |||
| 5324 | while (toread > 0) { | 5349 | while (toread > 0) { |
| 5325 | tocopy = sb->s_blocksize - offset < toread ? | 5350 | tocopy = sb->s_blocksize - offset < toread ? |
| 5326 | sb->s_blocksize - offset : toread; | 5351 | sb->s_blocksize - offset : toread; |
| 5327 | bh = ext4_bread(NULL, inode, blk, 0, &err); | 5352 | bh = ext4_bread(NULL, inode, blk, 0); |
| 5328 | if (err) | 5353 | if (IS_ERR(bh)) |
| 5329 | return err; | 5354 | return PTR_ERR(bh); |
| 5330 | if (!bh) /* A hole? */ | 5355 | if (!bh) /* A hole? */ |
| 5331 | memset(data, 0, tocopy); | 5356 | memset(data, 0, tocopy); |
| 5332 | else | 5357 | else |
| @@ -5347,8 +5372,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
| 5347 | { | 5372 | { |
| 5348 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5373 | struct inode *inode = sb_dqopt(sb)->files[type]; |
| 5349 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 5374 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
| 5350 | int err = 0; | 5375 | int err, offset = off & (sb->s_blocksize - 1); |
| 5351 | int offset = off & (sb->s_blocksize - 1); | ||
| 5352 | struct buffer_head *bh; | 5376 | struct buffer_head *bh; |
| 5353 | handle_t *handle = journal_current_handle(); | 5377 | handle_t *handle = journal_current_handle(); |
| 5354 | 5378 | ||
| @@ -5369,14 +5393,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
| 5369 | return -EIO; | 5393 | return -EIO; |
| 5370 | } | 5394 | } |
| 5371 | 5395 | ||
| 5372 | bh = ext4_bread(handle, inode, blk, 1, &err); | 5396 | bh = ext4_bread(handle, inode, blk, 1); |
| 5397 | if (IS_ERR(bh)) | ||
| 5398 | return PTR_ERR(bh); | ||
| 5373 | if (!bh) | 5399 | if (!bh) |
| 5374 | goto out; | 5400 | goto out; |
| 5375 | BUFFER_TRACE(bh, "get write access"); | 5401 | BUFFER_TRACE(bh, "get write access"); |
| 5376 | err = ext4_journal_get_write_access(handle, bh); | 5402 | err = ext4_journal_get_write_access(handle, bh); |
| 5377 | if (err) { | 5403 | if (err) { |
| 5378 | brelse(bh); | 5404 | brelse(bh); |
| 5379 | goto out; | 5405 | return err; |
| 5380 | } | 5406 | } |
| 5381 | lock_buffer(bh); | 5407 | lock_buffer(bh); |
| 5382 | memcpy(bh->b_data+offset, data, len); | 5408 | memcpy(bh->b_data+offset, data, len); |
| @@ -5385,8 +5411,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
| 5385 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | 5411 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
| 5386 | brelse(bh); | 5412 | brelse(bh); |
| 5387 | out: | 5413 | out: |
| 5388 | if (err) | ||
| 5389 | return err; | ||
| 5390 | if (inode->i_size < off + len) { | 5414 | if (inode->i_size < off + len) { |
| 5391 | i_size_write(inode, off + len); | 5415 | i_size_write(inode, off + len); |
| 5392 | EXT4_I(inode)->i_disksize = inode->i_size; | 5416 | EXT4_I(inode)->i_disksize = inode->i_size; |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e7387337060c..1e09fc77395c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -142,8 +142,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode, | |||
| 142 | sector_t block_nr, | 142 | sector_t block_nr, |
| 143 | struct ext4_xattr_header *hdr) | 143 | struct ext4_xattr_header *hdr) |
| 144 | { | 144 | { |
| 145 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 145 | if (ext4_has_metadata_csum(inode->i_sb) && |
| 146 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
| 147 | (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) | 146 | (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) |
| 148 | return 0; | 147 | return 0; |
| 149 | return 1; | 148 | return 1; |
| @@ -153,8 +152,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode, | |||
| 153 | sector_t block_nr, | 152 | sector_t block_nr, |
| 154 | struct ext4_xattr_header *hdr) | 153 | struct ext4_xattr_header *hdr) |
| 155 | { | 154 | { |
| 156 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 155 | if (!ext4_has_metadata_csum(inode->i_sb)) |
| 157 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
| 158 | return; | 156 | return; |
| 159 | 157 | ||
| 160 | hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); | 158 | hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); |
| @@ -190,14 +188,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
| 190 | } | 188 | } |
| 191 | 189 | ||
| 192 | static int | 190 | static int |
| 193 | ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) | 191 | ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, |
| 192 | void *value_start) | ||
| 194 | { | 193 | { |
| 195 | while (!IS_LAST_ENTRY(entry)) { | 194 | struct ext4_xattr_entry *e = entry; |
| 196 | struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); | 195 | |
| 196 | while (!IS_LAST_ENTRY(e)) { | ||
| 197 | struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); | ||
| 197 | if ((void *)next >= end) | 198 | if ((void *)next >= end) |
| 198 | return -EIO; | 199 | return -EIO; |
| 199 | entry = next; | 200 | e = next; |
| 200 | } | 201 | } |
| 202 | |||
| 203 | while (!IS_LAST_ENTRY(entry)) { | ||
| 204 | if (entry->e_value_size != 0 && | ||
| 205 | (value_start + le16_to_cpu(entry->e_value_offs) < | ||
| 206 | (void *)e + sizeof(__u32) || | ||
| 207 | value_start + le16_to_cpu(entry->e_value_offs) + | ||
| 208 | le32_to_cpu(entry->e_value_size) > end)) | ||
| 209 | return -EIO; | ||
| 210 | entry = EXT4_XATTR_NEXT(entry); | ||
| 211 | } | ||
| 212 | |||
| 201 | return 0; | 213 | return 0; |
| 202 | } | 214 | } |
| 203 | 215 | ||
| @@ -214,7 +226,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) | |||
| 214 | return -EIO; | 226 | return -EIO; |
| 215 | if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) | 227 | if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) |
| 216 | return -EIO; | 228 | return -EIO; |
| 217 | error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); | 229 | error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, |
| 230 | bh->b_data); | ||
| 218 | if (!error) | 231 | if (!error) |
| 219 | set_buffer_verified(bh); | 232 | set_buffer_verified(bh); |
| 220 | return error; | 233 | return error; |
| @@ -331,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, | |||
| 331 | header = IHDR(inode, raw_inode); | 344 | header = IHDR(inode, raw_inode); |
| 332 | entry = IFIRST(header); | 345 | entry = IFIRST(header); |
| 333 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; | 346 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; |
| 334 | error = ext4_xattr_check_names(entry, end); | 347 | error = ext4_xattr_check_names(entry, end, entry); |
| 335 | if (error) | 348 | if (error) |
| 336 | goto cleanup; | 349 | goto cleanup; |
| 337 | error = ext4_xattr_find_entry(&entry, name_index, name, | 350 | error = ext4_xattr_find_entry(&entry, name_index, name, |
| @@ -463,7 +476,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
| 463 | raw_inode = ext4_raw_inode(&iloc); | 476 | raw_inode = ext4_raw_inode(&iloc); |
| 464 | header = IHDR(inode, raw_inode); | 477 | header = IHDR(inode, raw_inode); |
| 465 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; | 478 | end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; |
| 466 | error = ext4_xattr_check_names(IFIRST(header), end); | 479 | error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); |
| 467 | if (error) | 480 | if (error) |
| 468 | goto cleanup; | 481 | goto cleanup; |
| 469 | error = ext4_xattr_list_entries(dentry, IFIRST(header), | 482 | error = ext4_xattr_list_entries(dentry, IFIRST(header), |
| @@ -899,14 +912,8 @@ inserted: | |||
| 899 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 912 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
| 900 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 913 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
| 901 | 914 | ||
| 902 | /* | ||
| 903 | * take i_data_sem because we will test | ||
| 904 | * i_delalloc_reserved_flag in ext4_mb_new_blocks | ||
| 905 | */ | ||
| 906 | down_read(&EXT4_I(inode)->i_data_sem); | ||
| 907 | block = ext4_new_meta_blocks(handle, inode, goal, 0, | 915 | block = ext4_new_meta_blocks(handle, inode, goal, 0, |
| 908 | NULL, &error); | 916 | NULL, &error); |
| 909 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
| 910 | if (error) | 917 | if (error) |
| 911 | goto cleanup; | 918 | goto cleanup; |
| 912 | 919 | ||
| @@ -986,7 +993,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, | |||
| 986 | is->s.here = is->s.first; | 993 | is->s.here = is->s.first; |
| 987 | is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; | 994 | is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; |
| 988 | if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { | 995 | if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { |
| 989 | error = ext4_xattr_check_names(IFIRST(header), is->s.end); | 996 | error = ext4_xattr_check_names(IFIRST(header), is->s.end, |
| 997 | IFIRST(header)); | ||
| 990 | if (error) | 998 | if (error) |
| 991 | return error; | 999 | return error; |
| 992 | /* Find the named attribute. */ | 1000 | /* Find the named attribute. */ |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6df8d3d885e5..b8b92c2f9683 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
| @@ -736,7 +736,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 736 | } | 736 | } |
| 737 | 737 | ||
| 738 | alias = d_find_alias(inode); | 738 | alias = d_find_alias(inode); |
| 739 | if (alias && !vfat_d_anon_disconn(alias)) { | 739 | /* |
| 740 | * Checking "alias->d_parent == dentry->d_parent" to make sure | ||
| 741 | * FS is not corrupted (especially double linked dir). | ||
| 742 | */ | ||
| 743 | if (alias && alias->d_parent == dentry->d_parent && | ||
| 744 | !vfat_d_anon_disconn(alias)) { | ||
| 740 | /* | 745 | /* |
| 741 | * This inode has non anonymous-DCACHE_DISCONNECTED | 746 | * This inode has non anonymous-DCACHE_DISCONNECTED |
| 742 | * dentry. This means, the user did ->lookup() by an | 747 | * dentry. This means, the user did ->lookup() by an |
| @@ -755,12 +760,9 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, | |||
| 755 | 760 | ||
| 756 | out: | 761 | out: |
| 757 | mutex_unlock(&MSDOS_SB(sb)->s_lock); | 762 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
| 758 | dentry->d_time = dentry->d_parent->d_inode->i_version; | 763 | if (!inode) |
| 759 | dentry = d_splice_alias(inode, dentry); | 764 | dentry->d_time = dir->i_version; |
| 760 | if (dentry) | 765 | return d_splice_alias(inode, dentry); |
| 761 | dentry->d_time = dentry->d_parent->d_inode->i_version; | ||
| 762 | return dentry; | ||
| 763 | |||
| 764 | error: | 766 | error: |
| 765 | mutex_unlock(&MSDOS_SB(sb)->s_lock); | 767 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
| 766 | return ERR_PTR(err); | 768 | return ERR_PTR(err); |
| @@ -793,7 +795,6 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
| 793 | inode->i_mtime = inode->i_atime = inode->i_ctime = ts; | 795 | inode->i_mtime = inode->i_atime = inode->i_ctime = ts; |
| 794 | /* timestamp is already written, so mark_inode_dirty() is unneeded. */ | 796 | /* timestamp is already written, so mark_inode_dirty() is unneeded. */ |
| 795 | 797 | ||
| 796 | dentry->d_time = dentry->d_parent->d_inode->i_version; | ||
| 797 | d_instantiate(dentry, inode); | 798 | d_instantiate(dentry, inode); |
| 798 | out: | 799 | out: |
| 799 | mutex_unlock(&MSDOS_SB(sb)->s_lock); | 800 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
| @@ -824,6 +825,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 824 | clear_nlink(inode); | 825 | clear_nlink(inode); |
| 825 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 826 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
| 826 | fat_detach(inode); | 827 | fat_detach(inode); |
| 828 | dentry->d_time = dir->i_version; | ||
| 827 | out: | 829 | out: |
| 828 | mutex_unlock(&MSDOS_SB(sb)->s_lock); | 830 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
| 829 | 831 | ||
| @@ -849,6 +851,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) | |||
| 849 | clear_nlink(inode); | 851 | clear_nlink(inode); |
| 850 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; | 852 | inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; |
| 851 | fat_detach(inode); | 853 | fat_detach(inode); |
| 854 | dentry->d_time = dir->i_version; | ||
| 852 | out: | 855 | out: |
| 853 | mutex_unlock(&MSDOS_SB(sb)->s_lock); | 856 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
| 854 | 857 | ||
| @@ -889,7 +892,6 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 889 | inode->i_mtime = inode->i_atime = inode->i_ctime = ts; | 892 | inode->i_mtime = inode->i_atime = inode->i_ctime = ts; |
| 890 | /* timestamp is already written, so mark_inode_dirty() is unneeded. */ | 893 | /* timestamp is already written, so mark_inode_dirty() is unneeded. */ |
| 891 | 894 | ||
| 892 | dentry->d_time = dentry->d_parent->d_inode->i_version; | ||
| 893 | d_instantiate(dentry, inode); | 895 | d_instantiate(dentry, inode); |
| 894 | 896 | ||
| 895 | mutex_unlock(&MSDOS_SB(sb)->s_lock); | 897 | mutex_unlock(&MSDOS_SB(sb)->s_lock); |
diff --git a/fs/internal.h b/fs/internal.h index 9477f8f6aefc..757ba2abf21e 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -47,7 +47,6 @@ extern void __init chrdev_init(void); | |||
| 47 | /* | 47 | /* |
| 48 | * namei.c | 48 | * namei.c |
| 49 | */ | 49 | */ |
| 50 | extern int __inode_permission(struct inode *, int); | ||
| 51 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); | 50 | extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); |
| 52 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, | 51 | extern int vfs_path_lookup(struct dentry *, struct vfsmount *, |
| 53 | const char *, unsigned int, struct path *); | 52 | const char *, unsigned int, struct path *); |
| @@ -139,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, | |||
| 139 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); | 138 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); |
| 140 | 139 | ||
| 141 | /* | 140 | /* |
| 142 | * splice.c | ||
| 143 | */ | ||
| 144 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||
| 145 | loff_t *opos, size_t len, unsigned int flags); | ||
| 146 | |||
| 147 | /* | ||
| 148 | * pipe.c | 141 | * pipe.c |
| 149 | */ | 142 | */ |
| 150 | extern const struct file_operations pipefifo_fops; | 143 | extern const struct file_operations pipefifo_fops; |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 881b3bd0143f..d67a16f2a45d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
| @@ -29,13 +29,9 @@ | |||
| 29 | #define BEQUIET | 29 | #define BEQUIET |
| 30 | 30 | ||
| 31 | static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); | 31 | static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); |
| 32 | static int isofs_hash(const struct dentry *parent, struct qstr *qstr); | ||
| 33 | static int isofs_dentry_cmpi(const struct dentry *parent, | 32 | static int isofs_dentry_cmpi(const struct dentry *parent, |
| 34 | const struct dentry *dentry, | 33 | const struct dentry *dentry, |
| 35 | unsigned int len, const char *str, const struct qstr *name); | 34 | unsigned int len, const char *str, const struct qstr *name); |
| 36 | static int isofs_dentry_cmp(const struct dentry *parent, | ||
| 37 | const struct dentry *dentry, | ||
| 38 | unsigned int len, const char *str, const struct qstr *name); | ||
| 39 | 35 | ||
| 40 | #ifdef CONFIG_JOLIET | 36 | #ifdef CONFIG_JOLIET |
| 41 | static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); | 37 | static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); |
| @@ -135,10 +131,6 @@ static const struct super_operations isofs_sops = { | |||
| 135 | 131 | ||
| 136 | static const struct dentry_operations isofs_dentry_ops[] = { | 132 | static const struct dentry_operations isofs_dentry_ops[] = { |
| 137 | { | 133 | { |
| 138 | .d_hash = isofs_hash, | ||
| 139 | .d_compare = isofs_dentry_cmp, | ||
| 140 | }, | ||
| 141 | { | ||
| 142 | .d_hash = isofs_hashi, | 134 | .d_hash = isofs_hashi, |
| 143 | .d_compare = isofs_dentry_cmpi, | 135 | .d_compare = isofs_dentry_cmpi, |
| 144 | }, | 136 | }, |
| @@ -182,27 +174,6 @@ struct iso9660_options{ | |||
| 182 | * Compute the hash for the isofs name corresponding to the dentry. | 174 | * Compute the hash for the isofs name corresponding to the dentry. |
| 183 | */ | 175 | */ |
| 184 | static int | 176 | static int |
| 185 | isofs_hash_common(struct qstr *qstr, int ms) | ||
| 186 | { | ||
| 187 | const char *name; | ||
| 188 | int len; | ||
| 189 | |||
| 190 | len = qstr->len; | ||
| 191 | name = qstr->name; | ||
| 192 | if (ms) { | ||
| 193 | while (len && name[len-1] == '.') | ||
| 194 | len--; | ||
| 195 | } | ||
| 196 | |||
| 197 | qstr->hash = full_name_hash(name, len); | ||
| 198 | |||
| 199 | return 0; | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | ||
| 203 | * Compute the hash for the isofs name corresponding to the dentry. | ||
| 204 | */ | ||
| 205 | static int | ||
| 206 | isofs_hashi_common(struct qstr *qstr, int ms) | 177 | isofs_hashi_common(struct qstr *qstr, int ms) |
| 207 | { | 178 | { |
| 208 | const char *name; | 179 | const char *name; |
| @@ -258,32 +229,40 @@ static int isofs_dentry_cmp_common( | |||
| 258 | } | 229 | } |
| 259 | 230 | ||
| 260 | static int | 231 | static int |
| 261 | isofs_hash(const struct dentry *dentry, struct qstr *qstr) | ||
| 262 | { | ||
| 263 | return isofs_hash_common(qstr, 0); | ||
| 264 | } | ||
| 265 | |||
| 266 | static int | ||
| 267 | isofs_hashi(const struct dentry *dentry, struct qstr *qstr) | 232 | isofs_hashi(const struct dentry *dentry, struct qstr *qstr) |
| 268 | { | 233 | { |
| 269 | return isofs_hashi_common(qstr, 0); | 234 | return isofs_hashi_common(qstr, 0); |
| 270 | } | 235 | } |
| 271 | 236 | ||
| 272 | static int | 237 | static int |
| 273 | isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry, | 238 | isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, |
| 274 | unsigned int len, const char *str, const struct qstr *name) | 239 | unsigned int len, const char *str, const struct qstr *name) |
| 275 | { | 240 | { |
| 276 | return isofs_dentry_cmp_common(len, str, name, 0, 0); | 241 | return isofs_dentry_cmp_common(len, str, name, 0, 1); |
| 277 | } | 242 | } |
| 278 | 243 | ||
| 244 | #ifdef CONFIG_JOLIET | ||
| 245 | /* | ||
| 246 | * Compute the hash for the isofs name corresponding to the dentry. | ||
| 247 | */ | ||
| 279 | static int | 248 | static int |
| 280 | isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, | 249 | isofs_hash_common(struct qstr *qstr, int ms) |
| 281 | unsigned int len, const char *str, const struct qstr *name) | ||
| 282 | { | 250 | { |
| 283 | return isofs_dentry_cmp_common(len, str, name, 0, 1); | 251 | const char *name; |
| 252 | int len; | ||
| 253 | |||
| 254 | len = qstr->len; | ||
| 255 | name = qstr->name; | ||
| 256 | if (ms) { | ||
| 257 | while (len && name[len-1] == '.') | ||
| 258 | len--; | ||
| 259 | } | ||
| 260 | |||
| 261 | qstr->hash = full_name_hash(name, len); | ||
| 262 | |||
| 263 | return 0; | ||
| 284 | } | 264 | } |
| 285 | 265 | ||
| 286 | #ifdef CONFIG_JOLIET | ||
| 287 | static int | 266 | static int |
| 288 | isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) | 267 | isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) |
| 289 | { | 268 | { |
| @@ -930,7 +909,8 @@ root_found: | |||
| 930 | if (opt.check == 'r') | 909 | if (opt.check == 'r') |
| 931 | table++; | 910 | table++; |
| 932 | 911 | ||
| 933 | s->s_d_op = &isofs_dentry_ops[table]; | 912 | if (table) |
| 913 | s->s_d_op = &isofs_dentry_ops[table - 1]; | ||
| 934 | 914 | ||
| 935 | /* get the root dentry */ | 915 | /* get the root dentry */ |
| 936 | s->s_root = d_make_root(inode); | 916 | s->s_root = d_make_root(inode); |
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 95295640d9c8..7b543e6b6526 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c | |||
| @@ -18,25 +18,10 @@ static int | |||
| 18 | isofs_cmp(struct dentry *dentry, const char *compare, int dlen) | 18 | isofs_cmp(struct dentry *dentry, const char *compare, int dlen) |
| 19 | { | 19 | { |
| 20 | struct qstr qstr; | 20 | struct qstr qstr; |
| 21 | |||
| 22 | if (!compare) | ||
| 23 | return 1; | ||
| 24 | |||
| 25 | /* check special "." and ".." files */ | ||
| 26 | if (dlen == 1) { | ||
| 27 | /* "." */ | ||
| 28 | if (compare[0] == 0) { | ||
| 29 | if (!dentry->d_name.len) | ||
| 30 | return 0; | ||
| 31 | compare = "."; | ||
| 32 | } else if (compare[0] == 1) { | ||
| 33 | compare = ".."; | ||
| 34 | dlen = 2; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | qstr.name = compare; | 21 | qstr.name = compare; |
| 39 | qstr.len = dlen; | 22 | qstr.len = dlen; |
| 23 | if (likely(!dentry->d_op)) | ||
| 24 | return dentry->d_name.len != dlen || memcmp(dentry->d_name.name, compare, dlen); | ||
| 40 | return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); | 25 | return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); |
| 41 | } | 26 | } |
| 42 | 27 | ||
| @@ -146,7 +131,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, | |||
| 146 | (!(de->flags[-sbi->s_high_sierra] & 1))) && | 131 | (!(de->flags[-sbi->s_high_sierra] & 1))) && |
| 147 | (sbi->s_showassoc || | 132 | (sbi->s_showassoc || |
| 148 | (!(de->flags[-sbi->s_high_sierra] & 4)))) { | 133 | (!(de->flags[-sbi->s_high_sierra] & 4)))) { |
| 149 | match = (isofs_cmp(dentry, dpnt, dlen) == 0); | 134 | if (dpnt && (dlen > 1 || dpnt[0] > 1)) |
| 135 | match = (isofs_cmp(dentry, dpnt, dlen) == 0); | ||
| 150 | } | 136 | } |
| 151 | if (match) { | 137 | if (match) { |
| 152 | isofs_normalize_block_and_offset(de, | 138 | isofs_normalize_block_and_offset(de, |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 06fe11e0abfa..aab8549591e7 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
| @@ -886,7 +886,7 @@ journal_t * journal_init_inode (struct inode *inode) | |||
| 886 | goto out_err; | 886 | goto out_err; |
| 887 | } | 887 | } |
| 888 | 888 | ||
| 889 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 889 | bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); |
| 890 | if (!bh) { | 890 | if (!bh) { |
| 891 | printk(KERN_ERR | 891 | printk(KERN_ERR |
| 892 | "%s: Cannot get buffer for journal superblock\n", | 892 | "%s: Cannot get buffer for journal superblock\n", |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 8898bbd2b61e..dcead636c33b 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
| @@ -93,6 +93,7 @@ | |||
| 93 | #include <linux/bio.h> | 93 | #include <linux/bio.h> |
| 94 | #endif | 94 | #endif |
| 95 | #include <linux/log2.h> | 95 | #include <linux/log2.h> |
| 96 | #include <linux/hash.h> | ||
| 96 | 97 | ||
| 97 | static struct kmem_cache *revoke_record_cache; | 98 | static struct kmem_cache *revoke_record_cache; |
| 98 | static struct kmem_cache *revoke_table_cache; | 99 | static struct kmem_cache *revoke_table_cache; |
| @@ -129,15 +130,11 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int); | |||
| 129 | 130 | ||
| 130 | /* Utility functions to maintain the revoke table */ | 131 | /* Utility functions to maintain the revoke table */ |
| 131 | 132 | ||
| 132 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ | ||
| 133 | static inline int hash(journal_t *journal, unsigned int block) | 133 | static inline int hash(journal_t *journal, unsigned int block) |
| 134 | { | 134 | { |
| 135 | struct jbd_revoke_table_s *table = journal->j_revoke; | 135 | struct jbd_revoke_table_s *table = journal->j_revoke; |
| 136 | int hash_shift = table->hash_shift; | ||
| 137 | 136 | ||
| 138 | return ((block << (hash_shift - 6)) ^ | 137 | return hash_32(block, table->hash_shift); |
| 139 | (block >> 13) ^ | ||
| 140 | (block << (hash_shift - 12))) & (table->hash_size - 1); | ||
| 141 | } | 138 | } |
| 142 | 139 | ||
| 143 | static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, | 140 | static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 7f34f4716165..988b32ed4c87 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
| 96 | 96 | ||
| 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && | 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && |
| 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
| 99 | /* | ||
| 100 | * Get our reference so that bh cannot be freed before | ||
| 101 | * we unlock it | ||
| 102 | */ | ||
| 103 | get_bh(bh); | ||
| 104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
| 105 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
| 106 | BUFFER_TRACE(bh, "release"); | ||
| 107 | __brelse(bh); | ||
| 108 | } | 101 | } |
| 109 | return ret; | 102 | return ret; |
| 110 | } | 103 | } |
| @@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 122 | 115 | ||
| 123 | nblocks = jbd2_space_needed(journal); | 116 | nblocks = jbd2_space_needed(journal); |
| 124 | while (jbd2_log_space_left(journal) < nblocks) { | 117 | while (jbd2_log_space_left(journal) < nblocks) { |
| 125 | if (journal->j_flags & JBD2_ABORT) | ||
| 126 | return; | ||
| 127 | write_unlock(&journal->j_state_lock); | 118 | write_unlock(&journal->j_state_lock); |
| 128 | mutex_lock(&journal->j_checkpoint_mutex); | 119 | mutex_lock(&journal->j_checkpoint_mutex); |
| 129 | 120 | ||
| @@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 139 | * trace for forensic evidence. | 130 | * trace for forensic evidence. |
| 140 | */ | 131 | */ |
| 141 | write_lock(&journal->j_state_lock); | 132 | write_lock(&journal->j_state_lock); |
| 133 | if (journal->j_flags & JBD2_ABORT) { | ||
| 134 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
| 135 | return; | ||
| 136 | } | ||
| 142 | spin_lock(&journal->j_list_lock); | 137 | spin_lock(&journal->j_list_lock); |
| 143 | nblocks = jbd2_space_needed(journal); | 138 | nblocks = jbd2_space_needed(journal); |
| 144 | space_left = jbd2_log_space_left(journal); | 139 | space_left = jbd2_log_space_left(journal); |
| @@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 183 | } | 178 | } |
| 184 | } | 179 | } |
| 185 | 180 | ||
| 186 | /* | ||
| 187 | * Clean up transaction's list of buffers submitted for io. | ||
| 188 | * We wait for any pending IO to complete and remove any clean | ||
| 189 | * buffers. Note that we take the buffers in the opposite ordering | ||
| 190 | * from the one in which they were submitted for IO. | ||
| 191 | * | ||
| 192 | * Return 0 on success, and return <0 if some buffers have failed | ||
| 193 | * to be written out. | ||
| 194 | * | ||
| 195 | * Called with j_list_lock held. | ||
| 196 | */ | ||
| 197 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) | ||
| 198 | { | ||
| 199 | struct journal_head *jh; | ||
| 200 | struct buffer_head *bh; | ||
| 201 | tid_t this_tid; | ||
| 202 | int released = 0; | ||
| 203 | int ret = 0; | ||
| 204 | |||
| 205 | this_tid = transaction->t_tid; | ||
| 206 | restart: | ||
| 207 | /* Did somebody clean up the transaction in the meanwhile? */ | ||
| 208 | if (journal->j_checkpoint_transactions != transaction || | ||
| 209 | transaction->t_tid != this_tid) | ||
| 210 | return ret; | ||
| 211 | while (!released && transaction->t_checkpoint_io_list) { | ||
| 212 | jh = transaction->t_checkpoint_io_list; | ||
| 213 | bh = jh2bh(jh); | ||
| 214 | get_bh(bh); | ||
| 215 | if (buffer_locked(bh)) { | ||
| 216 | spin_unlock(&journal->j_list_lock); | ||
| 217 | wait_on_buffer(bh); | ||
| 218 | /* the journal_head may have gone by now */ | ||
| 219 | BUFFER_TRACE(bh, "brelse"); | ||
| 220 | __brelse(bh); | ||
| 221 | spin_lock(&journal->j_list_lock); | ||
| 222 | goto restart; | ||
| 223 | } | ||
| 224 | if (unlikely(buffer_write_io_error(bh))) | ||
| 225 | ret = -EIO; | ||
| 226 | |||
| 227 | /* | ||
| 228 | * Now in whatever state the buffer currently is, we know that | ||
| 229 | * it has been written out and so we can drop it from the list | ||
| 230 | */ | ||
| 231 | released = __jbd2_journal_remove_checkpoint(jh); | ||
| 232 | __brelse(bh); | ||
| 233 | } | ||
| 234 | |||
| 235 | return ret; | ||
| 236 | } | ||
| 237 | |||
| 238 | static void | 181 | static void |
| 239 | __flush_batch(journal_t *journal, int *batch_count) | 182 | __flush_batch(journal_t *journal, int *batch_count) |
| 240 | { | 183 | { |
| @@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
| 255 | } | 198 | } |
| 256 | 199 | ||
| 257 | /* | 200 | /* |
| 258 | * Try to flush one buffer from the checkpoint list to disk. | ||
| 259 | * | ||
| 260 | * Return 1 if something happened which requires us to abort the current | ||
| 261 | * scan of the checkpoint list. Return <0 if the buffer has failed to | ||
| 262 | * be written out. | ||
| 263 | * | ||
| 264 | * Called with j_list_lock held and drops it if 1 is returned | ||
| 265 | */ | ||
| 266 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | ||
| 267 | int *batch_count, transaction_t *transaction) | ||
| 268 | { | ||
| 269 | struct buffer_head *bh = jh2bh(jh); | ||
| 270 | int ret = 0; | ||
| 271 | |||
| 272 | if (buffer_locked(bh)) { | ||
| 273 | get_bh(bh); | ||
| 274 | spin_unlock(&journal->j_list_lock); | ||
| 275 | wait_on_buffer(bh); | ||
| 276 | /* the journal_head may have gone by now */ | ||
| 277 | BUFFER_TRACE(bh, "brelse"); | ||
| 278 | __brelse(bh); | ||
| 279 | ret = 1; | ||
| 280 | } else if (jh->b_transaction != NULL) { | ||
| 281 | transaction_t *t = jh->b_transaction; | ||
| 282 | tid_t tid = t->t_tid; | ||
| 283 | |||
| 284 | transaction->t_chp_stats.cs_forced_to_close++; | ||
| 285 | spin_unlock(&journal->j_list_lock); | ||
| 286 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | ||
| 287 | /* | ||
| 288 | * The journal thread is dead; so starting and | ||
| 289 | * waiting for a commit to finish will cause | ||
| 290 | * us to wait for a _very_ long time. | ||
| 291 | */ | ||
| 292 | printk(KERN_ERR "JBD2: %s: " | ||
| 293 | "Waiting for Godot: block %llu\n", | ||
| 294 | journal->j_devname, | ||
| 295 | (unsigned long long) bh->b_blocknr); | ||
| 296 | jbd2_log_start_commit(journal, tid); | ||
| 297 | jbd2_log_wait_commit(journal, tid); | ||
| 298 | ret = 1; | ||
| 299 | } else if (!buffer_dirty(bh)) { | ||
| 300 | ret = 1; | ||
| 301 | if (unlikely(buffer_write_io_error(bh))) | ||
| 302 | ret = -EIO; | ||
| 303 | get_bh(bh); | ||
| 304 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
| 305 | __jbd2_journal_remove_checkpoint(jh); | ||
| 306 | spin_unlock(&journal->j_list_lock); | ||
| 307 | __brelse(bh); | ||
| 308 | } else { | ||
| 309 | /* | ||
| 310 | * Important: we are about to write the buffer, and | ||
| 311 | * possibly block, while still holding the journal lock. | ||
| 312 | * We cannot afford to let the transaction logic start | ||
| 313 | * messing around with this buffer before we write it to | ||
| 314 | * disk, as that would break recoverability. | ||
| 315 | */ | ||
| 316 | BUFFER_TRACE(bh, "queue"); | ||
| 317 | get_bh(bh); | ||
| 318 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | ||
| 319 | journal->j_chkpt_bhs[*batch_count] = bh; | ||
| 320 | __buffer_relink_io(jh); | ||
| 321 | transaction->t_chp_stats.cs_written++; | ||
| 322 | (*batch_count)++; | ||
| 323 | if (*batch_count == JBD2_NR_BATCH) { | ||
| 324 | spin_unlock(&journal->j_list_lock); | ||
| 325 | __flush_batch(journal, batch_count); | ||
| 326 | ret = 1; | ||
| 327 | } | ||
| 328 | } | ||
| 329 | return ret; | ||
| 330 | } | ||
| 331 | |||
| 332 | /* | ||
| 333 | * Perform an actual checkpoint. We take the first transaction on the | 201 | * Perform an actual checkpoint. We take the first transaction on the |
| 334 | * list of transactions to be checkpointed and send all its buffers | 202 | * list of transactions to be checkpointed and send all its buffers |
| 335 | * to disk. We submit larger chunks of data at once. | 203 | * to disk. We submit larger chunks of data at once. |
| @@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
| 339 | */ | 207 | */ |
| 340 | int jbd2_log_do_checkpoint(journal_t *journal) | 208 | int jbd2_log_do_checkpoint(journal_t *journal) |
| 341 | { | 209 | { |
| 342 | transaction_t *transaction; | 210 | struct journal_head *jh; |
| 343 | tid_t this_tid; | 211 | struct buffer_head *bh; |
| 344 | int result; | 212 | transaction_t *transaction; |
| 213 | tid_t this_tid; | ||
| 214 | int result, batch_count = 0; | ||
| 345 | 215 | ||
| 346 | jbd_debug(1, "Start checkpoint\n"); | 216 | jbd_debug(1, "Start checkpoint\n"); |
| 347 | 217 | ||
| @@ -374,45 +244,117 @@ restart: | |||
| 374 | * done (maybe it's a new transaction, but it fell at the same | 244 | * done (maybe it's a new transaction, but it fell at the same |
| 375 | * address). | 245 | * address). |
| 376 | */ | 246 | */ |
| 377 | if (journal->j_checkpoint_transactions == transaction && | 247 | if (journal->j_checkpoint_transactions != transaction || |
| 378 | transaction->t_tid == this_tid) { | 248 | transaction->t_tid != this_tid) |
| 379 | int batch_count = 0; | 249 | goto out; |
| 380 | struct journal_head *jh; | 250 | |
| 381 | int retry = 0, err; | 251 | /* checkpoint all of the transaction's buffers */ |
| 382 | 252 | while (transaction->t_checkpoint_list) { | |
| 383 | while (!retry && transaction->t_checkpoint_list) { | 253 | jh = transaction->t_checkpoint_list; |
| 384 | jh = transaction->t_checkpoint_list; | 254 | bh = jh2bh(jh); |
| 385 | retry = __process_buffer(journal, jh, &batch_count, | 255 | |
| 386 | transaction); | 256 | if (buffer_locked(bh)) { |
| 387 | if (retry < 0 && !result) | 257 | spin_unlock(&journal->j_list_lock); |
| 388 | result = retry; | 258 | get_bh(bh); |
| 389 | if (!retry && (need_resched() || | 259 | wait_on_buffer(bh); |
| 390 | spin_needbreak(&journal->j_list_lock))) { | 260 | /* the journal_head may have gone by now */ |
| 391 | spin_unlock(&journal->j_list_lock); | 261 | BUFFER_TRACE(bh, "brelse"); |
| 392 | retry = 1; | 262 | __brelse(bh); |
| 393 | break; | 263 | goto retry; |
| 394 | } | ||
| 395 | } | 264 | } |
| 265 | if (jh->b_transaction != NULL) { | ||
| 266 | transaction_t *t = jh->b_transaction; | ||
| 267 | tid_t tid = t->t_tid; | ||
| 396 | 268 | ||
| 397 | if (batch_count) { | 269 | transaction->t_chp_stats.cs_forced_to_close++; |
| 398 | if (!retry) { | 270 | spin_unlock(&journal->j_list_lock); |
| 399 | spin_unlock(&journal->j_list_lock); | 271 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) |
| 400 | retry = 1; | 272 | /* |
| 401 | } | 273 | * The journal thread is dead; so |
| 402 | __flush_batch(journal, &batch_count); | 274 | * starting and waiting for a commit |
| 275 | * to finish will cause us to wait for | ||
| 276 | * a _very_ long time. | ||
| 277 | */ | ||
| 278 | printk(KERN_ERR | ||
| 279 | "JBD2: %s: Waiting for Godot: block %llu\n", | ||
| 280 | journal->j_devname, (unsigned long long) bh->b_blocknr); | ||
| 281 | |||
| 282 | jbd2_log_start_commit(journal, tid); | ||
| 283 | jbd2_log_wait_commit(journal, tid); | ||
| 284 | goto retry; | ||
| 285 | } | ||
| 286 | if (!buffer_dirty(bh)) { | ||
| 287 | if (unlikely(buffer_write_io_error(bh)) && !result) | ||
| 288 | result = -EIO; | ||
| 289 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
| 290 | if (__jbd2_journal_remove_checkpoint(jh)) | ||
| 291 | /* The transaction was released; we're done */ | ||
| 292 | goto out; | ||
| 293 | continue; | ||
| 403 | } | 294 | } |
| 295 | /* | ||
| 296 | * Important: we are about to write the buffer, and | ||
| 297 | * possibly block, while still holding the journal | ||
| 298 | * lock. We cannot afford to let the transaction | ||
| 299 | * logic start messing around with this buffer before | ||
| 300 | * we write it to disk, as that would break | ||
| 301 | * recoverability. | ||
| 302 | */ | ||
| 303 | BUFFER_TRACE(bh, "queue"); | ||
| 304 | get_bh(bh); | ||
| 305 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | ||
| 306 | journal->j_chkpt_bhs[batch_count++] = bh; | ||
| 307 | __buffer_relink_io(jh); | ||
| 308 | transaction->t_chp_stats.cs_written++; | ||
| 309 | if ((batch_count == JBD2_NR_BATCH) || | ||
| 310 | need_resched() || | ||
| 311 | spin_needbreak(&journal->j_list_lock)) | ||
| 312 | goto unlock_and_flush; | ||
| 313 | } | ||
| 404 | 314 | ||
| 405 | if (retry) { | 315 | if (batch_count) { |
| 316 | unlock_and_flush: | ||
| 317 | spin_unlock(&journal->j_list_lock); | ||
| 318 | retry: | ||
| 319 | if (batch_count) | ||
| 320 | __flush_batch(journal, &batch_count); | ||
| 406 | spin_lock(&journal->j_list_lock); | 321 | spin_lock(&journal->j_list_lock); |
| 407 | goto restart; | 322 | goto restart; |
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Now we issued all of the transaction's buffers, let's deal | ||
| 327 | * with the buffers that are out for I/O. | ||
| 328 | */ | ||
| 329 | restart2: | ||
| 330 | /* Did somebody clean up the transaction in the meanwhile? */ | ||
| 331 | if (journal->j_checkpoint_transactions != transaction || | ||
| 332 | transaction->t_tid != this_tid) | ||
| 333 | goto out; | ||
| 334 | |||
| 335 | while (transaction->t_checkpoint_io_list) { | ||
| 336 | jh = transaction->t_checkpoint_io_list; | ||
| 337 | bh = jh2bh(jh); | ||
| 338 | if (buffer_locked(bh)) { | ||
| 339 | spin_unlock(&journal->j_list_lock); | ||
| 340 | get_bh(bh); | ||
| 341 | wait_on_buffer(bh); | ||
| 342 | /* the journal_head may have gone by now */ | ||
| 343 | BUFFER_TRACE(bh, "brelse"); | ||
| 344 | __brelse(bh); | ||
| 345 | spin_lock(&journal->j_list_lock); | ||
| 346 | goto restart2; | ||
| 408 | } | 347 | } |
| 348 | if (unlikely(buffer_write_io_error(bh)) && !result) | ||
| 349 | result = -EIO; | ||
| 350 | |||
| 409 | /* | 351 | /* |
| 410 | * Now we have cleaned up the first transaction's checkpoint | 352 | * Now in whatever state the buffer currently is, we |
| 411 | * list. Let's clean up the second one | 353 | * know that it has been written out and so we can |
| 354 | * drop it from the list | ||
| 412 | */ | 355 | */ |
| 413 | err = __wait_cp_io(journal, transaction); | 356 | if (__jbd2_journal_remove_checkpoint(jh)) |
| 414 | if (!result) | 357 | break; |
| 415 | result = err; | ||
| 416 | } | 358 | } |
| 417 | out: | 359 | out: |
| 418 | spin_unlock(&journal->j_list_lock); | 360 | spin_unlock(&journal->j_list_lock); |
| @@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
| 478 | * Find all the written-back checkpoint buffers in the given list and | 420 | * Find all the written-back checkpoint buffers in the given list and |
| 479 | * release them. | 421 | * release them. |
| 480 | * | 422 | * |
| 481 | * Called with the journal locked. | ||
| 482 | * Called with j_list_lock held. | 423 | * Called with j_list_lock held. |
| 483 | * Returns number of buffers reaped (for debug) | 424 | * Returns 1 if we freed the transaction, 0 otherwise. |
| 484 | */ | 425 | */ |
| 485 | 426 | static int journal_clean_one_cp_list(struct journal_head *jh) | |
| 486 | static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | ||
| 487 | { | 427 | { |
| 488 | struct journal_head *last_jh; | 428 | struct journal_head *last_jh; |
| 489 | struct journal_head *next_jh = jh; | 429 | struct journal_head *next_jh = jh; |
| 490 | int ret, freed = 0; | 430 | int ret; |
| 431 | int freed = 0; | ||
| 491 | 432 | ||
| 492 | *released = 0; | ||
| 493 | if (!jh) | 433 | if (!jh) |
| 494 | return 0; | 434 | return 0; |
| 495 | 435 | ||
| @@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
| 498 | jh = next_jh; | 438 | jh = next_jh; |
| 499 | next_jh = jh->b_cpnext; | 439 | next_jh = jh->b_cpnext; |
| 500 | ret = __try_to_free_cp_buf(jh); | 440 | ret = __try_to_free_cp_buf(jh); |
| 501 | if (ret) { | 441 | if (!ret) |
| 502 | freed++; | 442 | return freed; |
| 503 | if (ret == 2) { | 443 | if (ret == 2) |
| 504 | *released = 1; | 444 | return 1; |
| 505 | return freed; | 445 | freed = 1; |
| 506 | } | ||
| 507 | } | ||
| 508 | /* | 446 | /* |
| 509 | * This function only frees up some memory | 447 | * This function only frees up some memory |
| 510 | * if possible so we dont have an obligation | 448 | * if possible so we dont have an obligation |
| @@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
| 523 | * | 461 | * |
| 524 | * Find all the written-back checkpoint buffers in the journal and release them. | 462 | * Find all the written-back checkpoint buffers in the journal and release them. |
| 525 | * | 463 | * |
| 526 | * Called with the journal locked. | ||
| 527 | * Called with j_list_lock held. | 464 | * Called with j_list_lock held. |
| 528 | * Returns number of buffers reaped (for debug) | ||
| 529 | */ | 465 | */ |
| 530 | 466 | void __jbd2_journal_clean_checkpoint_list(journal_t *journal) | |
| 531 | int __jbd2_journal_clean_checkpoint_list(journal_t *journal) | ||
| 532 | { | 467 | { |
| 533 | transaction_t *transaction, *last_transaction, *next_transaction; | 468 | transaction_t *transaction, *last_transaction, *next_transaction; |
| 534 | int ret = 0; | 469 | int ret; |
| 535 | int released; | ||
| 536 | 470 | ||
| 537 | transaction = journal->j_checkpoint_transactions; | 471 | transaction = journal->j_checkpoint_transactions; |
| 538 | if (!transaction) | 472 | if (!transaction) |
| 539 | goto out; | 473 | return; |
| 540 | 474 | ||
| 541 | last_transaction = transaction->t_cpprev; | 475 | last_transaction = transaction->t_cpprev; |
| 542 | next_transaction = transaction; | 476 | next_transaction = transaction; |
| 543 | do { | 477 | do { |
| 544 | transaction = next_transaction; | 478 | transaction = next_transaction; |
| 545 | next_transaction = transaction->t_cpnext; | 479 | next_transaction = transaction->t_cpnext; |
| 546 | ret += journal_clean_one_cp_list(transaction-> | 480 | ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); |
| 547 | t_checkpoint_list, &released); | ||
| 548 | /* | 481 | /* |
| 549 | * This function only frees up some memory if possible so we | 482 | * This function only frees up some memory if possible so we |
| 550 | * dont have an obligation to finish processing. Bail out if | 483 | * dont have an obligation to finish processing. Bail out if |
| 551 | * preemption requested: | 484 | * preemption requested: |
| 552 | */ | 485 | */ |
| 553 | if (need_resched()) | 486 | if (need_resched()) |
| 554 | goto out; | 487 | return; |
| 555 | if (released) | 488 | if (ret) |
| 556 | continue; | 489 | continue; |
| 557 | /* | 490 | /* |
| 558 | * It is essential that we are as careful as in the case of | 491 | * It is essential that we are as careful as in the case of |
| 559 | * t_checkpoint_list with removing the buffer from the list as | 492 | * t_checkpoint_list with removing the buffer from the list as |
| 560 | * we can possibly see not yet submitted buffers on io_list | 493 | * we can possibly see not yet submitted buffers on io_list |
| 561 | */ | 494 | */ |
| 562 | ret += journal_clean_one_cp_list(transaction-> | 495 | ret = journal_clean_one_cp_list(transaction-> |
| 563 | t_checkpoint_io_list, &released); | 496 | t_checkpoint_io_list); |
| 564 | if (need_resched()) | 497 | if (need_resched()) |
| 565 | goto out; | 498 | return; |
| 499 | /* | ||
| 500 | * Stop scanning if we couldn't free the transaction. This | ||
| 501 | * avoids pointless scanning of transactions which still | ||
| 502 | * weren't checkpointed. | ||
| 503 | */ | ||
| 504 | if (!ret) | ||
| 505 | return; | ||
| 566 | } while (transaction != last_transaction); | 506 | } while (transaction != last_transaction); |
| 567 | out: | ||
| 568 | return ret; | ||
| 569 | } | 507 | } |
| 570 | 508 | ||
| 571 | /* | 509 | /* |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 19d74d86d99c..1df94fabe4eb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -1237,7 +1237,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
| 1237 | goto out_err; | 1237 | goto out_err; |
| 1238 | } | 1238 | } |
| 1239 | 1239 | ||
| 1240 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 1240 | bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); |
| 1241 | if (!bh) { | 1241 | if (!bh) { |
| 1242 | printk(KERN_ERR | 1242 | printk(KERN_ERR |
| 1243 | "%s: Cannot get buffer for journal superblock\n", | 1243 | "%s: Cannot get buffer for journal superblock\n", |
| @@ -1522,14 +1522,6 @@ static int journal_get_superblock(journal_t *journal) | |||
| 1522 | goto out; | 1522 | goto out; |
| 1523 | } | 1523 | } |
| 1524 | 1524 | ||
| 1525 | if (jbd2_journal_has_csum_v2or3(journal) && | ||
| 1526 | JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
| 1527 | /* Can't have checksum v1 and v2 on at the same time! */ | ||
| 1528 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " | ||
| 1529 | "at the same time!\n"); | ||
| 1530 | goto out; | ||
| 1531 | } | ||
| 1532 | |||
| 1533 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && | 1525 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && |
| 1534 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { | 1526 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { |
| 1535 | /* Can't have checksum v2 and v3 at the same time! */ | 1527 | /* Can't have checksum v2 and v3 at the same time! */ |
| @@ -1538,6 +1530,14 @@ static int journal_get_superblock(journal_t *journal) | |||
| 1538 | goto out; | 1530 | goto out; |
| 1539 | } | 1531 | } |
| 1540 | 1532 | ||
| 1533 | if (jbd2_journal_has_csum_v2or3(journal) && | ||
| 1534 | JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
| 1535 | /* Can't have checksum v1 and v2 on at the same time! */ | ||
| 1536 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " | ||
| 1537 | "at the same time!\n"); | ||
| 1538 | goto out; | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | if (!jbd2_verify_csum_type(journal, sb)) { | 1541 | if (!jbd2_verify_csum_type(journal, sb)) { |
| 1542 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); | 1542 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); |
| 1543 | goto out; | 1543 | goto out; |
| @@ -1853,13 +1853,12 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
| 1853 | journal->j_chksum_driver = NULL; | 1853 | journal->j_chksum_driver = NULL; |
| 1854 | return 0; | 1854 | return 0; |
| 1855 | } | 1855 | } |
| 1856 | } | ||
| 1857 | 1856 | ||
| 1858 | /* Precompute checksum seed for all metadata */ | 1857 | /* Precompute checksum seed for all metadata */ |
| 1859 | if (jbd2_journal_has_csum_v2or3(journal)) | ||
| 1860 | journal->j_csum_seed = jbd2_chksum(journal, ~0, | 1858 | journal->j_csum_seed = jbd2_chksum(journal, ~0, |
| 1861 | sb->s_uuid, | 1859 | sb->s_uuid, |
| 1862 | sizeof(sb->s_uuid)); | 1860 | sizeof(sb->s_uuid)); |
| 1861 | } | ||
| 1863 | } | 1862 | } |
| 1864 | 1863 | ||
| 1865 | /* If enabling v1 checksums, downgrade superblock */ | 1864 | /* If enabling v1 checksums, downgrade superblock */ |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 9b329b55ffe3..bcbef08a4d8f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
| @@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal, | |||
| 525 | !jbd2_descr_block_csum_verify(journal, | 525 | !jbd2_descr_block_csum_verify(journal, |
| 526 | bh->b_data)) { | 526 | bh->b_data)) { |
| 527 | err = -EIO; | 527 | err = -EIO; |
| 528 | brelse(bh); | ||
| 528 | goto failed; | 529 | goto failed; |
| 529 | } | 530 | } |
| 530 | 531 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index d5e95a175c92..c6cbaef2bda1 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
| @@ -92,6 +92,7 @@ | |||
| 92 | #include <linux/init.h> | 92 | #include <linux/init.h> |
| 93 | #include <linux/bio.h> | 93 | #include <linux/bio.h> |
| 94 | #include <linux/log2.h> | 94 | #include <linux/log2.h> |
| 95 | #include <linux/hash.h> | ||
| 95 | #endif | 96 | #endif |
| 96 | 97 | ||
| 97 | static struct kmem_cache *jbd2_revoke_record_cache; | 98 | static struct kmem_cache *jbd2_revoke_record_cache; |
| @@ -130,16 +131,9 @@ static void flush_descriptor(journal_t *, struct buffer_head *, int, int); | |||
| 130 | 131 | ||
| 131 | /* Utility functions to maintain the revoke table */ | 132 | /* Utility functions to maintain the revoke table */ |
| 132 | 133 | ||
| 133 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ | ||
| 134 | static inline int hash(journal_t *journal, unsigned long long block) | 134 | static inline int hash(journal_t *journal, unsigned long long block) |
| 135 | { | 135 | { |
| 136 | struct jbd2_revoke_table_s *table = journal->j_revoke; | 136 | return hash_64(block, journal->j_revoke->hash_shift); |
| 137 | int hash_shift = table->hash_shift; | ||
| 138 | int hash = (int)block ^ (int)((block >> 31) >> 1); | ||
| 139 | |||
| 140 | return ((hash << (hash_shift - 6)) ^ | ||
| 141 | (hash >> 13) ^ | ||
| 142 | (hash << (hash_shift - 12))) & (table->hash_size - 1); | ||
| 143 | } | 137 | } |
| 144 | 138 | ||
| 145 | static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, | 139 | static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, |
diff --git a/fs/namei.c b/fs/namei.c index 43927d14db67..db5fe86319e6 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask) | |||
| 416 | 416 | ||
| 417 | return security_inode_permission(inode, mask); | 417 | return security_inode_permission(inode, mask); |
| 418 | } | 418 | } |
| 419 | EXPORT_SYMBOL(__inode_permission); | ||
| 419 | 420 | ||
| 420 | /** | 421 | /** |
| 421 | * sb_permission - Check superblock-level permissions | 422 | * sb_permission - Check superblock-level permissions |
| @@ -2383,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path, | |||
| 2383 | } | 2384 | } |
| 2384 | EXPORT_SYMBOL(kern_path_mountpoint); | 2385 | EXPORT_SYMBOL(kern_path_mountpoint); |
| 2385 | 2386 | ||
| 2386 | /* | 2387 | int __check_sticky(struct inode *dir, struct inode *inode) |
| 2387 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
| 2388 | * minimal. | ||
| 2389 | */ | ||
| 2390 | static inline int check_sticky(struct inode *dir, struct inode *inode) | ||
| 2391 | { | 2388 | { |
| 2392 | kuid_t fsuid = current_fsuid(); | 2389 | kuid_t fsuid = current_fsuid(); |
| 2393 | 2390 | ||
| 2394 | if (!(dir->i_mode & S_ISVTX)) | ||
| 2395 | return 0; | ||
| 2396 | if (uid_eq(inode->i_uid, fsuid)) | 2391 | if (uid_eq(inode->i_uid, fsuid)) |
| 2397 | return 0; | 2392 | return 0; |
| 2398 | if (uid_eq(dir->i_uid, fsuid)) | 2393 | if (uid_eq(dir->i_uid, fsuid)) |
| 2399 | return 0; | 2394 | return 0; |
| 2400 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); | 2395 | return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); |
| 2401 | } | 2396 | } |
| 2397 | EXPORT_SYMBOL(__check_sticky); | ||
| 2402 | 2398 | ||
| 2403 | /* | 2399 | /* |
| 2404 | * Check whether we can remove a link victim from directory dir, check | 2400 | * Check whether we can remove a link victim from directory dir, check |
| @@ -2501,7 +2497,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) | |||
| 2501 | } | 2497 | } |
| 2502 | 2498 | ||
| 2503 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); | 2499 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); |
| 2504 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); | 2500 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2); |
| 2505 | return NULL; | 2501 | return NULL; |
| 2506 | } | 2502 | } |
| 2507 | EXPORT_SYMBOL(lock_rename); | 2503 | EXPORT_SYMBOL(lock_rename); |
| @@ -3064,9 +3060,12 @@ finish_open_created: | |||
| 3064 | error = may_open(&nd->path, acc_mode, open_flag); | 3060 | error = may_open(&nd->path, acc_mode, open_flag); |
| 3065 | if (error) | 3061 | if (error) |
| 3066 | goto out; | 3062 | goto out; |
| 3067 | file->f_path.mnt = nd->path.mnt; | 3063 | |
| 3068 | error = finish_open(file, nd->path.dentry, NULL, opened); | 3064 | BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ |
| 3069 | if (error) { | 3065 | error = vfs_open(&nd->path, file, current_cred()); |
| 3066 | if (!error) { | ||
| 3067 | *opened |= FILE_OPENED; | ||
| 3068 | } else { | ||
| 3070 | if (error == -EOPENSTALE) | 3069 | if (error == -EOPENSTALE) |
| 3071 | goto stale_open; | 3070 | goto stale_open; |
| 3072 | goto out; | 3071 | goto out; |
| @@ -3155,7 +3154,8 @@ static int do_tmpfile(int dfd, struct filename *pathname, | |||
| 3155 | if (error) | 3154 | if (error) |
| 3156 | goto out2; | 3155 | goto out2; |
| 3157 | audit_inode(pathname, nd->path.dentry, 0); | 3156 | audit_inode(pathname, nd->path.dentry, 0); |
| 3158 | error = may_open(&nd->path, op->acc_mode, op->open_flag); | 3157 | /* Don't check for other permissions, the inode was just created */ |
| 3158 | error = may_open(&nd->path, MAY_OPEN, op->open_flag); | ||
| 3159 | if (error) | 3159 | if (error) |
| 3160 | goto out2; | 3160 | goto out2; |
| 3161 | file->f_path.mnt = nd->path.mnt; | 3161 | file->f_path.mnt = nd->path.mnt; |
| @@ -4210,12 +4210,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, | |||
| 4210 | bool should_retry = false; | 4210 | bool should_retry = false; |
| 4211 | int error; | 4211 | int error; |
| 4212 | 4212 | ||
| 4213 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 4213 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
| 4214 | return -EINVAL; | 4214 | return -EINVAL; |
| 4215 | 4215 | ||
| 4216 | if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) | 4216 | if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && |
| 4217 | (flags & RENAME_EXCHANGE)) | ||
| 4217 | return -EINVAL; | 4218 | return -EINVAL; |
| 4218 | 4219 | ||
| 4220 | if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) | ||
| 4221 | return -EPERM; | ||
| 4222 | |||
| 4219 | retry: | 4223 | retry: |
| 4220 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); | 4224 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); |
| 4221 | if (IS_ERR(from)) { | 4225 | if (IS_ERR(from)) { |
| @@ -4347,6 +4351,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna | |||
| 4347 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); | 4351 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
| 4348 | } | 4352 | } |
| 4349 | 4353 | ||
| 4354 | int vfs_whiteout(struct inode *dir, struct dentry *dentry) | ||
| 4355 | { | ||
| 4356 | int error = may_create(dir, dentry); | ||
| 4357 | if (error) | ||
| 4358 | return error; | ||
| 4359 | |||
| 4360 | if (!dir->i_op->mknod) | ||
| 4361 | return -EPERM; | ||
| 4362 | |||
| 4363 | return dir->i_op->mknod(dir, dentry, | ||
| 4364 | S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); | ||
| 4365 | } | ||
| 4366 | EXPORT_SYMBOL(vfs_whiteout); | ||
| 4367 | |||
| 4350 | int readlink_copy(char __user *buffer, int buflen, const char *link) | 4368 | int readlink_copy(char __user *buffer, int buflen, const char *link) |
| 4351 | { | 4369 | { |
| 4352 | int len = PTR_ERR(link); | 4370 | int len = PTR_ERR(link); |
diff --git a/fs/namespace.c b/fs/namespace.c index fbba8b17330d..5b66b2b3624d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
| 1686 | namespace_unlock(); | 1686 | namespace_unlock(); |
| 1687 | } | 1687 | } |
| 1688 | 1688 | ||
| 1689 | /** | ||
| 1690 | * clone_private_mount - create a private clone of a path | ||
| 1691 | * | ||
| 1692 | * This creates a new vfsmount, which will be the clone of @path. The new will | ||
| 1693 | * not be attached anywhere in the namespace and will be private (i.e. changes | ||
| 1694 | * to the originating mount won't be propagated into this). | ||
| 1695 | * | ||
| 1696 | * Release with mntput(). | ||
| 1697 | */ | ||
| 1698 | struct vfsmount *clone_private_mount(struct path *path) | ||
| 1699 | { | ||
| 1700 | struct mount *old_mnt = real_mount(path->mnt); | ||
| 1701 | struct mount *new_mnt; | ||
| 1702 | |||
| 1703 | if (IS_MNT_UNBINDABLE(old_mnt)) | ||
| 1704 | return ERR_PTR(-EINVAL); | ||
| 1705 | |||
| 1706 | down_read(&namespace_sem); | ||
| 1707 | new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); | ||
| 1708 | up_read(&namespace_sem); | ||
| 1709 | if (IS_ERR(new_mnt)) | ||
| 1710 | return ERR_CAST(new_mnt); | ||
| 1711 | |||
| 1712 | return &new_mnt->mnt; | ||
| 1713 | } | ||
| 1714 | EXPORT_SYMBOL_GPL(clone_private_mount); | ||
| 1715 | |||
| 1689 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, | 1716 | int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, |
| 1690 | struct vfsmount *root) | 1717 | struct vfsmount *root) |
| 1691 | { | 1718 | { |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 5228f201d3d5..4f46f7a05289 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
| @@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) | |||
| 378 | loff_t offset = header->args.offset; | 378 | loff_t offset = header->args.offset; |
| 379 | size_t count = header->args.count; | 379 | size_t count = header->args.count; |
| 380 | struct page **pages = header->args.pages; | 380 | struct page **pages = header->args.pages; |
| 381 | int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; | 381 | int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
| 382 | unsigned int pg_len; | 382 | unsigned int pg_len; |
| 383 | struct blk_plug plug; | 383 | struct blk_plug plug; |
| 384 | int i; | 384 | int i; |
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index e966c023b1b7..acbf9ca4018c 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c | |||
| @@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | |||
| 65 | 65 | ||
| 66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | 66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); |
| 67 | 67 | ||
| 68 | mutex_lock(&nn->bl_mutex); | ||
| 68 | bl_pipe_msg.bl_wq = &nn->bl_wq; | 69 | bl_pipe_msg.bl_wq = &nn->bl_wq; |
| 69 | 70 | ||
| 70 | b->simple.len += 4; /* single volume */ | 71 | b->simple.len += 4; /* single volume */ |
| 71 | if (b->simple.len > PAGE_SIZE) | 72 | if (b->simple.len > PAGE_SIZE) |
| 72 | return -EIO; | 73 | goto out_unlock; |
| 73 | 74 | ||
| 74 | memset(msg, 0, sizeof(*msg)); | 75 | memset(msg, 0, sizeof(*msg)); |
| 75 | msg->len = sizeof(*bl_msg) + b->simple.len; | 76 | msg->len = sizeof(*bl_msg) + b->simple.len; |
| 76 | msg->data = kzalloc(msg->len, gfp_mask); | 77 | msg->data = kzalloc(msg->len, gfp_mask); |
| 77 | if (!msg->data) | 78 | if (!msg->data) |
| 78 | goto out; | 79 | goto out_free_data; |
| 79 | 80 | ||
| 80 | bl_msg = msg->data; | 81 | bl_msg = msg->data; |
| 81 | bl_msg->type = BL_DEVICE_MOUNT, | 82 | bl_msg->type = BL_DEVICE_MOUNT, |
| @@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | |||
| 87 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | 88 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); |
| 88 | if (rc < 0) { | 89 | if (rc < 0) { |
| 89 | remove_wait_queue(&nn->bl_wq, &wq); | 90 | remove_wait_queue(&nn->bl_wq, &wq); |
| 90 | goto out; | 91 | goto out_free_data; |
| 91 | } | 92 | } |
| 92 | 93 | ||
| 93 | set_current_state(TASK_UNINTERRUPTIBLE); | 94 | set_current_state(TASK_UNINTERRUPTIBLE); |
| @@ -97,12 +98,14 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | |||
| 97 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | 98 | if (reply->status != BL_DEVICE_REQUEST_PROC) { |
| 98 | printk(KERN_WARNING "%s failed to decode device: %d\n", | 99 | printk(KERN_WARNING "%s failed to decode device: %d\n", |
| 99 | __func__, reply->status); | 100 | __func__, reply->status); |
| 100 | goto out; | 101 | goto out_free_data; |
| 101 | } | 102 | } |
| 102 | 103 | ||
| 103 | dev = MKDEV(reply->major, reply->minor); | 104 | dev = MKDEV(reply->major, reply->minor); |
| 104 | out: | 105 | out_free_data: |
| 105 | kfree(msg->data); | 106 | kfree(msg->data); |
| 107 | out_unlock: | ||
| 108 | mutex_unlock(&nn->bl_mutex); | ||
| 106 | return dev; | 109 | return dev; |
| 107 | } | 110 | } |
| 108 | 111 | ||
| @@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net) | |||
| 232 | struct nfs_net *nn = net_generic(net, nfs_net_id); | 235 | struct nfs_net *nn = net_generic(net, nfs_net_id); |
| 233 | struct dentry *dentry; | 236 | struct dentry *dentry; |
| 234 | 237 | ||
| 238 | mutex_init(&nn->bl_mutex); | ||
| 235 | init_waitqueue_head(&nn->bl_wq); | 239 | init_waitqueue_head(&nn->bl_wq); |
| 236 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | 240 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); |
| 237 | if (IS_ERR(nn->bl_device_pipe)) | 241 | if (IS_ERR(nn->bl_device_pipe)) |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5853f53db732..7f3f60641344 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
| @@ -125,6 +125,8 @@ again: | |||
| 125 | continue; | 125 | continue; |
| 126 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) | 126 | if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) |
| 127 | continue; | 127 | continue; |
| 128 | if (!nfs4_valid_open_stateid(state)) | ||
| 129 | continue; | ||
| 128 | if (!nfs4_stateid_match(&state->stateid, stateid)) | 130 | if (!nfs4_stateid_match(&state->stateid, stateid)) |
| 129 | continue; | 131 | continue; |
| 130 | get_nfs_open_context(ctx); | 132 | get_nfs_open_context(ctx); |
| @@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * | |||
| 193 | { | 195 | { |
| 194 | int res = 0; | 196 | int res = 0; |
| 195 | 197 | ||
| 196 | res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); | 198 | if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) |
| 199 | res = nfs4_proc_delegreturn(inode, | ||
| 200 | delegation->cred, | ||
| 201 | &delegation->stateid, | ||
| 202 | issync); | ||
| 197 | nfs_free_delegation(delegation); | 203 | nfs_free_delegation(delegation); |
| 198 | return res; | 204 | return res; |
| 199 | } | 205 | } |
| @@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation | |||
| 380 | { | 386 | { |
| 381 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 387 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; |
| 382 | struct nfs_inode *nfsi = NFS_I(inode); | 388 | struct nfs_inode *nfsi = NFS_I(inode); |
| 383 | int err; | 389 | int err = 0; |
| 384 | 390 | ||
| 385 | if (delegation == NULL) | 391 | if (delegation == NULL) |
| 386 | return 0; | 392 | return 0; |
| 387 | do { | 393 | do { |
| 394 | if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) | ||
| 395 | break; | ||
| 388 | err = nfs_delegation_claim_opens(inode, &delegation->stateid); | 396 | err = nfs_delegation_claim_opens(inode, &delegation->stateid); |
| 389 | if (!issync || err != -EAGAIN) | 397 | if (!issync || err != -EAGAIN) |
| 390 | break; | 398 | break; |
| @@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl | |||
| 605 | rcu_read_unlock(); | 613 | rcu_read_unlock(); |
| 606 | } | 614 | } |
| 607 | 615 | ||
| 616 | static void nfs_revoke_delegation(struct inode *inode) | ||
| 617 | { | ||
| 618 | struct nfs_delegation *delegation; | ||
| 619 | rcu_read_lock(); | ||
| 620 | delegation = rcu_dereference(NFS_I(inode)->delegation); | ||
| 621 | if (delegation != NULL) { | ||
| 622 | set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); | ||
| 623 | nfs_mark_return_delegation(NFS_SERVER(inode), delegation); | ||
| 624 | } | ||
| 625 | rcu_read_unlock(); | ||
| 626 | } | ||
| 627 | |||
| 608 | void nfs_remove_bad_delegation(struct inode *inode) | 628 | void nfs_remove_bad_delegation(struct inode *inode) |
| 609 | { | 629 | { |
| 610 | struct nfs_delegation *delegation; | 630 | struct nfs_delegation *delegation; |
| 611 | 631 | ||
| 632 | nfs_revoke_delegation(inode); | ||
| 612 | delegation = nfs_inode_detach_delegation(inode); | 633 | delegation = nfs_inode_detach_delegation(inode); |
| 613 | if (delegation) { | 634 | if (delegation) { |
| 614 | nfs_inode_find_state_and_recover(inode, &delegation->stateid); | 635 | nfs_inode_find_state_and_recover(inode, &delegation->stateid); |
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5c1cce39297f..e3c20a3ccc93 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h | |||
| @@ -31,6 +31,7 @@ enum { | |||
| 31 | NFS_DELEGATION_RETURN_IF_CLOSED, | 31 | NFS_DELEGATION_RETURN_IF_CLOSED, |
| 32 | NFS_DELEGATION_REFERENCED, | 32 | NFS_DELEGATION_REFERENCED, |
| 33 | NFS_DELEGATION_RETURNING, | 33 | NFS_DELEGATION_RETURNING, |
| 34 | NFS_DELEGATION_REVOKED, | ||
| 34 | }; | 35 | }; |
| 35 | 36 | ||
| 36 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); | 37 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 06e8cfcbb670..6e62155abf26 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |||
| 1527 | case -ENOENT: | 1527 | case -ENOENT: |
| 1528 | d_drop(dentry); | 1528 | d_drop(dentry); |
| 1529 | d_add(dentry, NULL); | 1529 | d_add(dentry, NULL); |
| 1530 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
| 1530 | break; | 1531 | break; |
| 1531 | case -EISDIR: | 1532 | case -EISDIR: |
| 1532 | case -ENOTDIR: | 1533 | case -ENOTDIR: |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 20cffc830468..10bf07280f4a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
| @@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref) | |||
| 266 | { | 266 | { |
| 267 | struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); | 267 | struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); |
| 268 | 268 | ||
| 269 | nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); | ||
| 269 | if (dreq->l_ctx != NULL) | 270 | if (dreq->l_ctx != NULL) |
| 270 | nfs_put_lock_context(dreq->l_ctx); | 271 | nfs_put_lock_context(dreq->l_ctx); |
| 271 | if (dreq->ctx != NULL) | 272 | if (dreq->ctx != NULL) |
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 46fab1cb455a..7afb52f6a25a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
| @@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, | |||
| 145 | case -NFS4ERR_DELEG_REVOKED: | 145 | case -NFS4ERR_DELEG_REVOKED: |
| 146 | case -NFS4ERR_ADMIN_REVOKED: | 146 | case -NFS4ERR_ADMIN_REVOKED: |
| 147 | case -NFS4ERR_BAD_STATEID: | 147 | case -NFS4ERR_BAD_STATEID: |
| 148 | if (state == NULL) | ||
| 149 | break; | ||
| 150 | nfs_remove_bad_delegation(state->inode); | ||
| 151 | case -NFS4ERR_OPENMODE: | 148 | case -NFS4ERR_OPENMODE: |
| 152 | if (state == NULL) | 149 | if (state == NULL) |
| 153 | break; | 150 | break; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6388a59f2add..00689a8a85e4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -626,7 +626,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
| 626 | { | 626 | { |
| 627 | struct inode *inode = dentry->d_inode; | 627 | struct inode *inode = dentry->d_inode; |
| 628 | int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; | 628 | int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; |
| 629 | int err; | 629 | int err = 0; |
| 630 | 630 | ||
| 631 | trace_nfs_getattr_enter(inode); | 631 | trace_nfs_getattr_enter(inode); |
| 632 | /* Flush out writes to the server in order to update c/mtime. */ | 632 | /* Flush out writes to the server in order to update c/mtime. */ |
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index ef221fb8a183..f0e06e4acbef 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h | |||
| @@ -19,6 +19,7 @@ struct nfs_net { | |||
| 19 | struct rpc_pipe *bl_device_pipe; | 19 | struct rpc_pipe *bl_device_pipe; |
| 20 | struct bl_dev_msg bl_mount_reply; | 20 | struct bl_dev_msg bl_mount_reply; |
| 21 | wait_queue_head_t bl_wq; | 21 | wait_queue_head_t bl_wq; |
| 22 | struct mutex bl_mutex; | ||
| 22 | struct list_head nfs_client_list; | 23 | struct list_head nfs_client_list; |
| 23 | struct list_head nfs_volume_list; | 24 | struct list_head nfs_volume_list; |
| 24 | #if IS_ENABLED(CONFIG_NFS_V4) | 25 | #if IS_ENABLED(CONFIG_NFS_V4) |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 405bd95c1f58..69dc20a743f9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -370,11 +370,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc | |||
| 370 | case -NFS4ERR_DELEG_REVOKED: | 370 | case -NFS4ERR_DELEG_REVOKED: |
| 371 | case -NFS4ERR_ADMIN_REVOKED: | 371 | case -NFS4ERR_ADMIN_REVOKED: |
| 372 | case -NFS4ERR_BAD_STATEID: | 372 | case -NFS4ERR_BAD_STATEID: |
| 373 | if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { | ||
| 374 | nfs_remove_bad_delegation(inode); | ||
| 375 | exception->retry = 1; | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | if (state == NULL) | 373 | if (state == NULL) |
| 379 | break; | 374 | break; |
| 380 | ret = nfs4_schedule_stateid_recovery(server, state); | 375 | ret = nfs4_schedule_stateid_recovery(server, state); |
| @@ -1654,7 +1649,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct | |||
| 1654 | nfs_inode_find_state_and_recover(state->inode, | 1649 | nfs_inode_find_state_and_recover(state->inode, |
| 1655 | stateid); | 1650 | stateid); |
| 1656 | nfs4_schedule_stateid_recovery(server, state); | 1651 | nfs4_schedule_stateid_recovery(server, state); |
| 1657 | return 0; | 1652 | return -EAGAIN; |
| 1658 | case -NFS4ERR_DELAY: | 1653 | case -NFS4ERR_DELAY: |
| 1659 | case -NFS4ERR_GRACE: | 1654 | case -NFS4ERR_GRACE: |
| 1660 | set_bit(NFS_DELEGATED_STATE, &state->flags); | 1655 | set_bit(NFS_DELEGATED_STATE, &state->flags); |
| @@ -2109,46 +2104,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta | |||
| 2109 | return ret; | 2104 | return ret; |
| 2110 | } | 2105 | } |
| 2111 | 2106 | ||
| 2107 | static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) | ||
| 2108 | { | ||
| 2109 | nfs_remove_bad_delegation(state->inode); | ||
| 2110 | write_seqlock(&state->seqlock); | ||
| 2111 | nfs4_stateid_copy(&state->stateid, &state->open_stateid); | ||
| 2112 | write_sequnlock(&state->seqlock); | ||
| 2113 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | ||
| 2114 | } | ||
| 2115 | |||
| 2116 | static void nfs40_clear_delegation_stateid(struct nfs4_state *state) | ||
| 2117 | { | ||
| 2118 | if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) | ||
| 2119 | nfs_finish_clear_delegation_stateid(state); | ||
| 2120 | } | ||
| 2121 | |||
| 2122 | static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) | ||
| 2123 | { | ||
| 2124 | /* NFSv4.0 doesn't allow for delegation recovery on open expire */ | ||
| 2125 | nfs40_clear_delegation_stateid(state); | ||
| 2126 | return nfs4_open_expired(sp, state); | ||
| 2127 | } | ||
| 2128 | |||
| 2112 | #if defined(CONFIG_NFS_V4_1) | 2129 | #if defined(CONFIG_NFS_V4_1) |
| 2113 | static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | 2130 | static void nfs41_check_delegation_stateid(struct nfs4_state *state) |
| 2114 | { | 2131 | { |
| 2115 | struct nfs_server *server = NFS_SERVER(state->inode); | 2132 | struct nfs_server *server = NFS_SERVER(state->inode); |
| 2116 | nfs4_stateid *stateid = &state->stateid; | 2133 | nfs4_stateid stateid; |
| 2117 | struct nfs_delegation *delegation; | 2134 | struct nfs_delegation *delegation; |
| 2118 | struct rpc_cred *cred = NULL; | 2135 | struct rpc_cred *cred; |
| 2119 | int status = -NFS4ERR_BAD_STATEID; | 2136 | int status; |
| 2120 | |||
| 2121 | /* If a state reset has been done, test_stateid is unneeded */ | ||
| 2122 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | ||
| 2123 | return; | ||
| 2124 | 2137 | ||
| 2125 | /* Get the delegation credential for use by test/free_stateid */ | 2138 | /* Get the delegation credential for use by test/free_stateid */ |
| 2126 | rcu_read_lock(); | 2139 | rcu_read_lock(); |
| 2127 | delegation = rcu_dereference(NFS_I(state->inode)->delegation); | 2140 | delegation = rcu_dereference(NFS_I(state->inode)->delegation); |
| 2128 | if (delegation != NULL && | 2141 | if (delegation == NULL) { |
| 2129 | nfs4_stateid_match(&delegation->stateid, stateid)) { | ||
| 2130 | cred = get_rpccred(delegation->cred); | ||
| 2131 | rcu_read_unlock(); | ||
| 2132 | status = nfs41_test_stateid(server, stateid, cred); | ||
| 2133 | trace_nfs4_test_delegation_stateid(state, NULL, status); | ||
| 2134 | } else | ||
| 2135 | rcu_read_unlock(); | 2142 | rcu_read_unlock(); |
| 2143 | return; | ||
| 2144 | } | ||
| 2145 | |||
| 2146 | nfs4_stateid_copy(&stateid, &delegation->stateid); | ||
| 2147 | cred = get_rpccred(delegation->cred); | ||
| 2148 | rcu_read_unlock(); | ||
| 2149 | status = nfs41_test_stateid(server, &stateid, cred); | ||
| 2150 | trace_nfs4_test_delegation_stateid(state, NULL, status); | ||
| 2136 | 2151 | ||
| 2137 | if (status != NFS_OK) { | 2152 | if (status != NFS_OK) { |
| 2138 | /* Free the stateid unless the server explicitly | 2153 | /* Free the stateid unless the server explicitly |
| 2139 | * informs us the stateid is unrecognized. */ | 2154 | * informs us the stateid is unrecognized. */ |
| 2140 | if (status != -NFS4ERR_BAD_STATEID) | 2155 | if (status != -NFS4ERR_BAD_STATEID) |
| 2141 | nfs41_free_stateid(server, stateid, cred); | 2156 | nfs41_free_stateid(server, &stateid, cred); |
| 2142 | nfs_remove_bad_delegation(state->inode); | 2157 | nfs_finish_clear_delegation_stateid(state); |
| 2143 | |||
| 2144 | write_seqlock(&state->seqlock); | ||
| 2145 | nfs4_stateid_copy(&state->stateid, &state->open_stateid); | ||
| 2146 | write_sequnlock(&state->seqlock); | ||
| 2147 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | ||
| 2148 | } | 2158 | } |
| 2149 | 2159 | ||
| 2150 | if (cred != NULL) | 2160 | put_rpccred(cred); |
| 2151 | put_rpccred(cred); | ||
| 2152 | } | 2161 | } |
| 2153 | 2162 | ||
| 2154 | /** | 2163 | /** |
| @@ -2192,7 +2201,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st | |||
| 2192 | { | 2201 | { |
| 2193 | int status; | 2202 | int status; |
| 2194 | 2203 | ||
| 2195 | nfs41_clear_delegation_stateid(state); | 2204 | nfs41_check_delegation_stateid(state); |
| 2196 | status = nfs41_check_open_stateid(state); | 2205 | status = nfs41_check_open_stateid(state); |
| 2197 | if (status != NFS_OK) | 2206 | if (status != NFS_OK) |
| 2198 | status = nfs4_open_expired(sp, state); | 2207 | status = nfs4_open_expired(sp, state); |
| @@ -2231,19 +2240,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |||
| 2231 | seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | 2240 | seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); |
| 2232 | 2241 | ||
| 2233 | ret = _nfs4_proc_open(opendata); | 2242 | ret = _nfs4_proc_open(opendata); |
| 2234 | if (ret != 0) { | 2243 | if (ret != 0) |
| 2235 | if (ret == -ENOENT) { | ||
| 2236 | dentry = opendata->dentry; | ||
| 2237 | if (dentry->d_inode) | ||
| 2238 | d_delete(dentry); | ||
| 2239 | else if (d_unhashed(dentry)) | ||
| 2240 | d_add(dentry, NULL); | ||
| 2241 | |||
| 2242 | nfs_set_verifier(dentry, | ||
| 2243 | nfs_save_change_attribute(opendata->dir->d_inode)); | ||
| 2244 | } | ||
| 2245 | goto out; | 2244 | goto out; |
| 2246 | } | ||
| 2247 | 2245 | ||
| 2248 | state = nfs4_opendata_to_nfs4_state(opendata); | 2246 | state = nfs4_opendata_to_nfs4_state(opendata); |
| 2249 | ret = PTR_ERR(state); | 2247 | ret = PTR_ERR(state); |
| @@ -4841,9 +4839,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
| 4841 | case -NFS4ERR_DELEG_REVOKED: | 4839 | case -NFS4ERR_DELEG_REVOKED: |
| 4842 | case -NFS4ERR_ADMIN_REVOKED: | 4840 | case -NFS4ERR_ADMIN_REVOKED: |
| 4843 | case -NFS4ERR_BAD_STATEID: | 4841 | case -NFS4ERR_BAD_STATEID: |
| 4844 | if (state == NULL) | ||
| 4845 | break; | ||
| 4846 | nfs_remove_bad_delegation(state->inode); | ||
| 4847 | case -NFS4ERR_OPENMODE: | 4842 | case -NFS4ERR_OPENMODE: |
| 4848 | if (state == NULL) | 4843 | if (state == NULL) |
| 4849 | break; | 4844 | break; |
| @@ -8341,7 +8336,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { | |||
| 8341 | static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { | 8336 | static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { |
| 8342 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, | 8337 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, |
| 8343 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, | 8338 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, |
| 8344 | .recover_open = nfs4_open_expired, | 8339 | .recover_open = nfs40_open_expired, |
| 8345 | .recover_lock = nfs4_lock_expired, | 8340 | .recover_lock = nfs4_lock_expired, |
| 8346 | .establish_clid = nfs4_init_clientid, | 8341 | .establish_clid = nfs4_init_clientid, |
| 8347 | }; | 8342 | }; |
| @@ -8408,8 +8403,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | |||
| 8408 | | NFS_CAP_CHANGE_ATTR | 8403 | | NFS_CAP_CHANGE_ATTR |
| 8409 | | NFS_CAP_POSIX_LOCK | 8404 | | NFS_CAP_POSIX_LOCK |
| 8410 | | NFS_CAP_STATEID_NFSV41 | 8405 | | NFS_CAP_STATEID_NFSV41 |
| 8411 | | NFS_CAP_ATOMIC_OPEN_V1 | 8406 | | NFS_CAP_ATOMIC_OPEN_V1, |
| 8412 | | NFS_CAP_SEEK, | ||
| 8413 | .init_client = nfs41_init_client, | 8407 | .init_client = nfs41_init_client, |
| 8414 | .shutdown_client = nfs41_shutdown_client, | 8408 | .shutdown_client = nfs41_shutdown_client, |
| 8415 | .match_stateid = nfs41_match_stateid, | 8409 | .match_stateid = nfs41_match_stateid, |
| @@ -8431,7 +8425,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | |||
| 8431 | | NFS_CAP_CHANGE_ATTR | 8425 | | NFS_CAP_CHANGE_ATTR |
| 8432 | | NFS_CAP_POSIX_LOCK | 8426 | | NFS_CAP_POSIX_LOCK |
| 8433 | | NFS_CAP_STATEID_NFSV41 | 8427 | | NFS_CAP_STATEID_NFSV41 |
| 8434 | | NFS_CAP_ATOMIC_OPEN_V1, | 8428 | | NFS_CAP_ATOMIC_OPEN_V1 |
| 8429 | | NFS_CAP_SEEK, | ||
| 8435 | .init_client = nfs41_init_client, | 8430 | .init_client = nfs41_init_client, |
| 8436 | .shutdown_client = nfs41_shutdown_client, | 8431 | .shutdown_client = nfs41_shutdown_client, |
| 8437 | .match_stateid = nfs41_match_stateid, | 8432 | .match_stateid = nfs41_match_stateid, |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c6e4bda63000..9e5bc42180e4 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * All rights reserved. | 5 | * All rights reserved. |
| 6 | * | 6 | * |
| 7 | * Benny Halevy <bhalevy@panasas.com> | 7 | * Benny Halevy <bhalevy@panasas.com> |
| 8 | * Boaz Harrosh <bharrosh@panasas.com> | 8 | * Boaz Harrosh <ooo@electrozaur.com> |
| 9 | * | 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
| 11 | * it under the terms of the GNU General Public License version 2 | 11 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index c89357c7a914..919efd4a1a23 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * All rights reserved. | 5 | * All rights reserved. |
| 6 | * | 6 | * |
| 7 | * Benny Halevy <bhalevy@panasas.com> | 7 | * Benny Halevy <bhalevy@panasas.com> |
| 8 | * Boaz Harrosh <bharrosh@panasas.com> | 8 | * Boaz Harrosh <ooo@electrozaur.com> |
| 9 | * | 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
| 11 | * it under the terms of the GNU General Public License version 2 | 11 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 3a0828d57339..2641dbad345c 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | * All rights reserved. | 6 | * All rights reserved. |
| 7 | * | 7 | * |
| 8 | * Benny Halevy <bhalevy@panasas.com> | 8 | * Benny Halevy <bhalevy@panasas.com> |
| 9 | * Boaz Harrosh <bharrosh@panasas.com> | 9 | * Boaz Harrosh <ooo@electrozaur.com> |
| 10 | * | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify | 11 | * This program is free software; you can redistribute it and/or modify |
| 12 | * it under the terms of the GNU General Public License version 2 | 12 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index b3918f7ac34d..f093c7ec983b 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * All rights reserved. | 5 | * All rights reserved. |
| 6 | * | 6 | * |
| 7 | * Benny Halevy <bhalevy@panasas.com> | 7 | * Benny Halevy <bhalevy@panasas.com> |
| 8 | * Boaz Harrosh <bharrosh@panasas.com> | 8 | * Boaz Harrosh <ooo@electrozaur.com> |
| 9 | * | 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
| 11 | * it under the terms of the GNU General Public License version 2 | 11 | * it under the terms of the GNU General Public License version 2 |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 12493846a2d3..f83b02dc9166 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -715,8 +715,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
| 715 | 715 | ||
| 716 | if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) | 716 | if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) |
| 717 | nfs_release_request(req); | 717 | nfs_release_request(req); |
| 718 | else | ||
| 719 | WARN_ON_ONCE(1); | ||
| 720 | } | 718 | } |
| 721 | 719 | ||
| 722 | static void | 720 | static void |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ed2b1151b171..7cbdf1b2e4ab 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
| @@ -774,8 +774,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) | |||
| 774 | { | 774 | { |
| 775 | if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { | 775 | if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { |
| 776 | rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); | 776 | rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); |
| 777 | dprintk("%s slot is busy\n", __func__); | 777 | /* Race breaker */ |
| 778 | return false; | 778 | if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { |
| 779 | dprintk("%s slot is busy\n", __func__); | ||
| 780 | return false; | ||
| 781 | } | ||
| 782 | rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); | ||
| 779 | } | 783 | } |
| 780 | return true; | 784 | return true; |
| 781 | } | 785 | } |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index cdeb3cfd6f32..0beb023f25ac 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -1272,7 +1272,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) | |||
| 1272 | */ | 1272 | */ |
| 1273 | if (argp->opcnt == resp->opcnt) | 1273 | if (argp->opcnt == resp->opcnt) |
| 1274 | return false; | 1274 | return false; |
| 1275 | 1275 | if (next->opnum == OP_ILLEGAL) | |
| 1276 | return false; | ||
| 1276 | nextd = OPDESC(next); | 1277 | nextd = OPDESC(next); |
| 1277 | /* | 1278 | /* |
| 1278 | * Rest of 2.6.3.1.1: certain operations will return WRONGSEC | 1279 | * Rest of 2.6.3.1.1: certain operations will return WRONGSEC |
| @@ -1589,7 +1590,8 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op | |||
| 1589 | static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, | 1590 | static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, |
| 1590 | struct nfsd4_op *op) | 1591 | struct nfsd4_op *op) |
| 1591 | { | 1592 | { |
| 1592 | return NFS4_MAX_SESSIONID_LEN + 20; | 1593 | return (op_encode_hdr_size |
| 1594 | + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); | ||
| 1593 | } | 1595 | } |
| 1594 | 1596 | ||
| 1595 | static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | 1597 | static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) |
| @@ -1893,6 +1895,7 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
| 1893 | .op_func = (nfsd4op_func)nfsd4_sequence, | 1895 | .op_func = (nfsd4op_func)nfsd4_sequence, |
| 1894 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1896 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, |
| 1895 | .op_name = "OP_SEQUENCE", | 1897 | .op_name = "OP_SEQUENCE", |
| 1898 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize, | ||
| 1896 | }, | 1899 | }, |
| 1897 | [OP_DESTROY_CLIENTID] = { | 1900 | [OP_DESTROY_CLIENTID] = { |
| 1898 | .op_func = (nfsd4op_func)nfsd4_destroy_clientid, | 1901 | .op_func = (nfsd4op_func)nfsd4_destroy_clientid, |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 747f3b95bd11..33a46a8dfaf7 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
| @@ -335,12 +335,15 @@ void nfsd_lockd_shutdown(void); | |||
| 335 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | 335 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) |
| 336 | 336 | ||
| 337 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | 337 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL |
| 338 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ | 338 | #define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL |
| 339 | (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) | ||
| 340 | #else | 339 | #else |
| 341 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 | 340 | #define NFSD4_2_SECURITY_ATTRS 0 |
| 342 | #endif | 341 | #endif |
| 343 | 342 | ||
| 343 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ | ||
| 344 | (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ | ||
| 345 | NFSD4_2_SECURITY_ATTRS) | ||
| 346 | |||
| 344 | static inline u32 nfsd_suppattrs0(u32 minorversion) | 347 | static inline u32 nfsd_suppattrs0(u32 minorversion) |
| 345 | { | 348 | { |
| 346 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 | 349 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9d3e9c50066a..89326acd4561 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
| @@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
| 229 | &fsnotify_mark_srcu); | 229 | &fsnotify_mark_srcu); |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | /* | ||
| 233 | * We need to merge inode & vfsmount mark lists so that inode mark | ||
| 234 | * ignore masks are properly reflected for mount mark notifications. | ||
| 235 | * That's why this traversal is so complicated... | ||
| 236 | */ | ||
| 232 | while (inode_node || vfsmount_node) { | 237 | while (inode_node || vfsmount_node) { |
| 233 | inode_group = vfsmount_group = NULL; | 238 | inode_group = NULL; |
| 239 | inode_mark = NULL; | ||
| 240 | vfsmount_group = NULL; | ||
| 241 | vfsmount_mark = NULL; | ||
| 234 | 242 | ||
| 235 | if (inode_node) { | 243 | if (inode_node) { |
| 236 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), | 244 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), |
| @@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
| 244 | vfsmount_group = vfsmount_mark->group; | 252 | vfsmount_group = vfsmount_mark->group; |
| 245 | } | 253 | } |
| 246 | 254 | ||
| 247 | if (inode_group > vfsmount_group) { | 255 | if (inode_group && vfsmount_group) { |
| 248 | /* handle inode */ | 256 | int cmp = fsnotify_compare_groups(inode_group, |
| 249 | ret = send_to_group(to_tell, inode_mark, NULL, mask, | 257 | vfsmount_group); |
| 250 | data, data_is, cookie, file_name); | 258 | if (cmp > 0) { |
| 251 | /* we didn't use the vfsmount_mark */ | 259 | inode_group = NULL; |
| 252 | vfsmount_group = NULL; | 260 | inode_mark = NULL; |
| 253 | } else if (vfsmount_group > inode_group) { | 261 | } else if (cmp < 0) { |
| 254 | ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, | 262 | vfsmount_group = NULL; |
| 255 | data, data_is, cookie, file_name); | 263 | vfsmount_mark = NULL; |
| 256 | inode_group = NULL; | 264 | } |
| 257 | } else { | ||
| 258 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, | ||
| 259 | mask, data, data_is, cookie, | ||
| 260 | file_name); | ||
| 261 | } | 265 | } |
| 266 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, | ||
| 267 | data, data_is, cookie, file_name); | ||
| 262 | 268 | ||
| 263 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) | 269 | if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) |
| 264 | goto out; | 270 | goto out; |
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 9c0898c4cfe1..3b68b0ae0a97 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h | |||
| @@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); | |||
| 12 | /* protects reads of inode and vfsmount marks list */ | 12 | /* protects reads of inode and vfsmount marks list */ |
| 13 | extern struct srcu_struct fsnotify_mark_srcu; | 13 | extern struct srcu_struct fsnotify_mark_srcu; |
| 14 | 14 | ||
| 15 | /* compare two groups for sorting of marks lists */ | ||
| 16 | extern int fsnotify_compare_groups(struct fsnotify_group *a, | ||
| 17 | struct fsnotify_group *b); | ||
| 18 | |||
| 15 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, | 19 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, |
| 16 | __u32 mask); | 20 | __u32 mask); |
| 17 | /* add a mark to an inode */ | 21 | /* add a mark to an inode */ |
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 9ce062218de9..dfbf5447eea4 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
| @@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, | |||
| 194 | { | 194 | { |
| 195 | struct fsnotify_mark *lmark, *last = NULL; | 195 | struct fsnotify_mark *lmark, *last = NULL; |
| 196 | int ret = 0; | 196 | int ret = 0; |
| 197 | int cmp; | ||
| 197 | 198 | ||
| 198 | mark->flags |= FSNOTIFY_MARK_FLAG_INODE; | 199 | mark->flags |= FSNOTIFY_MARK_FLAG_INODE; |
| 199 | 200 | ||
| @@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, | |||
| 219 | goto out; | 220 | goto out; |
| 220 | } | 221 | } |
| 221 | 222 | ||
| 222 | if (mark->group->priority < lmark->group->priority) | 223 | cmp = fsnotify_compare_groups(lmark->group, mark->group); |
| 223 | continue; | 224 | if (cmp < 0) |
| 224 | |||
| 225 | if ((mark->group->priority == lmark->group->priority) && | ||
| 226 | (mark->group < lmark->group)) | ||
| 227 | continue; | 225 | continue; |
| 228 | 226 | ||
| 229 | hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); | 227 | hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); |
| @@ -288,20 +286,25 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
| 288 | spin_unlock(&inode->i_lock); | 286 | spin_unlock(&inode->i_lock); |
| 289 | 287 | ||
| 290 | /* In case the dropping of a reference would nuke next_i. */ | 288 | /* In case the dropping of a reference would nuke next_i. */ |
| 291 | if ((&next_i->i_sb_list != list) && | 289 | while (&next_i->i_sb_list != list) { |
| 292 | atomic_read(&next_i->i_count)) { | ||
| 293 | spin_lock(&next_i->i_lock); | 290 | spin_lock(&next_i->i_lock); |
| 294 | if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { | 291 | if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && |
| 292 | atomic_read(&next_i->i_count)) { | ||
| 295 | __iget(next_i); | 293 | __iget(next_i); |
| 296 | need_iput = next_i; | 294 | need_iput = next_i; |
| 295 | spin_unlock(&next_i->i_lock); | ||
| 296 | break; | ||
| 297 | } | 297 | } |
| 298 | spin_unlock(&next_i->i_lock); | 298 | spin_unlock(&next_i->i_lock); |
| 299 | next_i = list_entry(next_i->i_sb_list.next, | ||
| 300 | struct inode, i_sb_list); | ||
| 299 | } | 301 | } |
| 300 | 302 | ||
| 301 | /* | 303 | /* |
| 302 | * We can safely drop inode_sb_list_lock here because we hold | 304 | * We can safely drop inode_sb_list_lock here because either |
| 303 | * references on both inode and next_i. Also no new inodes | 305 | * we actually hold references on both inode and next_i or |
| 304 | * will be added since the umount has begun. | 306 | * end of list. Also no new inodes will be added since the |
| 307 | * umount has begun. | ||
| 305 | */ | 308 | */ |
| 306 | spin_unlock(&inode_sb_list_lock); | 309 | spin_unlock(&inode_sb_list_lock); |
| 307 | 310 | ||
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d90deaa08e78..34c38fabf514 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
| @@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas | |||
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | /* | 212 | /* |
| 213 | * Sorting function for lists of fsnotify marks. | ||
| 214 | * | ||
| 215 | * Fanotify supports different notification classes (reflected as priority of | ||
| 216 | * notification group). Events shall be passed to notification groups in | ||
| 217 | * decreasing priority order. To achieve this marks in notification lists for | ||
| 218 | * inodes and vfsmounts are sorted so that priorities of corresponding groups | ||
| 219 | * are descending. | ||
| 220 | * | ||
| 221 | * Furthermore correct handling of the ignore mask requires processing inode | ||
| 222 | * and vfsmount marks of each group together. Using the group address as | ||
| 223 | * further sort criterion provides a unique sorting order and thus we can | ||
| 224 | * merge inode and vfsmount lists of marks in linear time and find groups | ||
| 225 | * present in both lists. | ||
| 226 | * | ||
| 227 | * A return value of 1 signifies that b has priority over a. | ||
| 228 | * A return value of 0 signifies that the two marks have to be handled together. | ||
| 229 | * A return value of -1 signifies that a has priority over b. | ||
| 230 | */ | ||
| 231 | int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) | ||
| 232 | { | ||
| 233 | if (a == b) | ||
| 234 | return 0; | ||
| 235 | if (!a) | ||
| 236 | return 1; | ||
| 237 | if (!b) | ||
| 238 | return -1; | ||
| 239 | if (a->priority < b->priority) | ||
| 240 | return 1; | ||
| 241 | if (a->priority > b->priority) | ||
| 242 | return -1; | ||
| 243 | if (a < b) | ||
| 244 | return 1; | ||
| 245 | return -1; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* | ||
| 213 | * Attach an initialized mark to a given group and fs object. | 249 | * Attach an initialized mark to a given group and fs object. |
| 214 | * These marks may be used for the fsnotify backend to determine which | 250 | * These marks may be used for the fsnotify backend to determine which |
| 215 | * event types should be delivered to which group. | 251 | * event types should be delivered to which group. |
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index ac851e8376b1..faefa72a11eb 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c | |||
| @@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | |||
| 153 | struct mount *m = real_mount(mnt); | 153 | struct mount *m = real_mount(mnt); |
| 154 | struct fsnotify_mark *lmark, *last = NULL; | 154 | struct fsnotify_mark *lmark, *last = NULL; |
| 155 | int ret = 0; | 155 | int ret = 0; |
| 156 | int cmp; | ||
| 156 | 157 | ||
| 157 | mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; | 158 | mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; |
| 158 | 159 | ||
| @@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | |||
| 178 | goto out; | 179 | goto out; |
| 179 | } | 180 | } |
| 180 | 181 | ||
| 181 | if (mark->group->priority < lmark->group->priority) | 182 | cmp = fsnotify_compare_groups(lmark->group, mark->group); |
| 182 | continue; | 183 | if (cmp < 0) |
| 183 | |||
| 184 | if ((mark->group->priority == lmark->group->priority) && | ||
| 185 | (mark->group < lmark->group)) | ||
| 186 | continue; | 184 | continue; |
| 187 | 185 | ||
| 188 | hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); | 186 | hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 97de0fbd9f78..a96044004064 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, | |||
| 925 | size_t veclen, size_t total) | 925 | size_t veclen, size_t total) |
| 926 | { | 926 | { |
| 927 | int ret; | 927 | int ret; |
| 928 | struct msghdr msg; | 928 | struct msghdr msg = {.msg_flags = 0,}; |
| 929 | 929 | ||
| 930 | if (sock == NULL) { | 930 | if (sock == NULL) { |
| 931 | ret = -EINVAL; | 931 | ret = -EINVAL; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8add6f1030d7..b931e04e3388 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -158,7 +158,7 @@ bail_add: | |||
| 158 | * NOTE: This dentry already has ->d_op set from | 158 | * NOTE: This dentry already has ->d_op set from |
| 159 | * ocfs2_get_parent() and ocfs2_get_dentry() | 159 | * ocfs2_get_parent() and ocfs2_get_dentry() |
| 160 | */ | 160 | */ |
| 161 | if (ret) | 161 | if (!IS_ERR_OR_NULL(ret)) |
| 162 | dentry = ret; | 162 | dentry = ret; |
| 163 | 163 | ||
| 164 | status = ocfs2_dentry_attach_lock(dentry, inode, | 164 | status = ocfs2_dentry_attach_lock(dentry, inode, |
| @@ -823,8 +823,7 @@ struct file *dentry_open(const struct path *path, int flags, | |||
| 823 | f = get_empty_filp(); | 823 | f = get_empty_filp(); |
| 824 | if (!IS_ERR(f)) { | 824 | if (!IS_ERR(f)) { |
| 825 | f->f_flags = flags; | 825 | f->f_flags = flags; |
| 826 | f->f_path = *path; | 826 | error = vfs_open(path, f, cred); |
| 827 | error = do_dentry_open(f, NULL, cred); | ||
| 828 | if (!error) { | 827 | if (!error) { |
| 829 | /* from now on we need fput() to dispose of f */ | 828 | /* from now on we need fput() to dispose of f */ |
| 830 | error = open_check_o_direct(f); | 829 | error = open_check_o_direct(f); |
| @@ -841,6 +840,26 @@ struct file *dentry_open(const struct path *path, int flags, | |||
| 841 | } | 840 | } |
| 842 | EXPORT_SYMBOL(dentry_open); | 841 | EXPORT_SYMBOL(dentry_open); |
| 843 | 842 | ||
| 843 | /** | ||
| 844 | * vfs_open - open the file at the given path | ||
| 845 | * @path: path to open | ||
| 846 | * @filp: newly allocated file with f_flag initialized | ||
| 847 | * @cred: credentials to use | ||
| 848 | */ | ||
| 849 | int vfs_open(const struct path *path, struct file *filp, | ||
| 850 | const struct cred *cred) | ||
| 851 | { | ||
| 852 | struct inode *inode = path->dentry->d_inode; | ||
| 853 | |||
| 854 | if (inode->i_op->dentry_open) | ||
| 855 | return inode->i_op->dentry_open(path->dentry, filp, cred); | ||
| 856 | else { | ||
| 857 | filp->f_path = *path; | ||
| 858 | return do_dentry_open(filp, NULL, cred); | ||
| 859 | } | ||
| 860 | } | ||
| 861 | EXPORT_SYMBOL(vfs_open); | ||
| 862 | |||
| 844 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) | 863 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
| 845 | { | 864 | { |
| 846 | int lookup_flags = 0; | 865 | int lookup_flags = 0; |
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig new file mode 100644 index 000000000000..34355818a2e0 --- /dev/null +++ b/fs/overlayfs/Kconfig | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | config OVERLAY_FS | ||
| 2 | tristate "Overlay filesystem support" | ||
| 3 | help | ||
| 4 | An overlay filesystem combines two filesystems - an 'upper' filesystem | ||
| 5 | and a 'lower' filesystem. When a name exists in both filesystems, the | ||
| 6 | object in the 'upper' filesystem is visible while the object in the | ||
| 7 | 'lower' filesystem is either hidden or, in the case of directories, | ||
| 8 | merged with the 'upper' object. | ||
| 9 | |||
| 10 | For more information see Documentation/filesystems/overlayfs.txt | ||
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile new file mode 100644 index 000000000000..900daed3e91d --- /dev/null +++ b/fs/overlayfs/Makefile | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | # | ||
| 2 | # Makefile for the overlay filesystem. | ||
| 3 | # | ||
| 4 | |||
| 5 | obj-$(CONFIG_OVERLAY_FS) += overlay.o | ||
| 6 | |||
| 7 | overlay-objs := super.o inode.o dir.o readdir.o copy_up.o | ||
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c new file mode 100644 index 000000000000..ea10a8719107 --- /dev/null +++ b/fs/overlayfs/copy_up.c | |||
| @@ -0,0 +1,414 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/file.h> | ||
| 13 | #include <linux/splice.h> | ||
| 14 | #include <linux/xattr.h> | ||
| 15 | #include <linux/security.h> | ||
| 16 | #include <linux/uaccess.h> | ||
| 17 | #include <linux/sched.h> | ||
| 18 | #include <linux/namei.h> | ||
| 19 | #include "overlayfs.h" | ||
| 20 | |||
| 21 | #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) | ||
| 22 | |||
| 23 | int ovl_copy_xattr(struct dentry *old, struct dentry *new) | ||
| 24 | { | ||
| 25 | ssize_t list_size, size; | ||
| 26 | char *buf, *name, *value; | ||
| 27 | int error; | ||
| 28 | |||
| 29 | if (!old->d_inode->i_op->getxattr || | ||
| 30 | !new->d_inode->i_op->getxattr) | ||
| 31 | return 0; | ||
| 32 | |||
| 33 | list_size = vfs_listxattr(old, NULL, 0); | ||
| 34 | if (list_size <= 0) { | ||
| 35 | if (list_size == -EOPNOTSUPP) | ||
| 36 | return 0; | ||
| 37 | return list_size; | ||
| 38 | } | ||
| 39 | |||
| 40 | buf = kzalloc(list_size, GFP_KERNEL); | ||
| 41 | if (!buf) | ||
| 42 | return -ENOMEM; | ||
| 43 | |||
| 44 | error = -ENOMEM; | ||
| 45 | value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); | ||
| 46 | if (!value) | ||
| 47 | goto out; | ||
| 48 | |||
| 49 | list_size = vfs_listxattr(old, buf, list_size); | ||
| 50 | if (list_size <= 0) { | ||
| 51 | error = list_size; | ||
| 52 | goto out_free_value; | ||
| 53 | } | ||
| 54 | |||
| 55 | for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { | ||
| 56 | size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); | ||
| 57 | if (size <= 0) { | ||
| 58 | error = size; | ||
| 59 | goto out_free_value; | ||
| 60 | } | ||
| 61 | error = vfs_setxattr(new, name, value, size, 0); | ||
| 62 | if (error) | ||
| 63 | goto out_free_value; | ||
| 64 | } | ||
| 65 | |||
| 66 | out_free_value: | ||
| 67 | kfree(value); | ||
| 68 | out: | ||
| 69 | kfree(buf); | ||
| 70 | return error; | ||
| 71 | } | ||
| 72 | |||
| 73 | static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) | ||
| 74 | { | ||
| 75 | struct file *old_file; | ||
| 76 | struct file *new_file; | ||
| 77 | loff_t old_pos = 0; | ||
| 78 | loff_t new_pos = 0; | ||
| 79 | int error = 0; | ||
| 80 | |||
| 81 | if (len == 0) | ||
| 82 | return 0; | ||
| 83 | |||
| 84 | old_file = ovl_path_open(old, O_RDONLY); | ||
| 85 | if (IS_ERR(old_file)) | ||
| 86 | return PTR_ERR(old_file); | ||
| 87 | |||
| 88 | new_file = ovl_path_open(new, O_WRONLY); | ||
| 89 | if (IS_ERR(new_file)) { | ||
| 90 | error = PTR_ERR(new_file); | ||
| 91 | goto out_fput; | ||
| 92 | } | ||
| 93 | |||
| 94 | /* FIXME: copy up sparse files efficiently */ | ||
| 95 | while (len) { | ||
| 96 | size_t this_len = OVL_COPY_UP_CHUNK_SIZE; | ||
| 97 | long bytes; | ||
| 98 | |||
| 99 | if (len < this_len) | ||
| 100 | this_len = len; | ||
| 101 | |||
| 102 | if (signal_pending_state(TASK_KILLABLE, current)) { | ||
| 103 | error = -EINTR; | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | |||
| 107 | bytes = do_splice_direct(old_file, &old_pos, | ||
| 108 | new_file, &new_pos, | ||
| 109 | this_len, SPLICE_F_MOVE); | ||
| 110 | if (bytes <= 0) { | ||
| 111 | error = bytes; | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | WARN_ON(old_pos != new_pos); | ||
| 115 | |||
| 116 | len -= bytes; | ||
| 117 | } | ||
| 118 | |||
| 119 | fput(new_file); | ||
| 120 | out_fput: | ||
| 121 | fput(old_file); | ||
| 122 | return error; | ||
| 123 | } | ||
| 124 | |||
| 125 | static char *ovl_read_symlink(struct dentry *realdentry) | ||
| 126 | { | ||
| 127 | int res; | ||
| 128 | char *buf; | ||
| 129 | struct inode *inode = realdentry->d_inode; | ||
| 130 | mm_segment_t old_fs; | ||
| 131 | |||
| 132 | res = -EINVAL; | ||
| 133 | if (!inode->i_op->readlink) | ||
| 134 | goto err; | ||
| 135 | |||
| 136 | res = -ENOMEM; | ||
| 137 | buf = (char *) __get_free_page(GFP_KERNEL); | ||
| 138 | if (!buf) | ||
| 139 | goto err; | ||
| 140 | |||
| 141 | old_fs = get_fs(); | ||
| 142 | set_fs(get_ds()); | ||
| 143 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
| 144 | res = inode->i_op->readlink(realdentry, | ||
| 145 | (char __user *)buf, PAGE_SIZE - 1); | ||
| 146 | set_fs(old_fs); | ||
| 147 | if (res < 0) { | ||
| 148 | free_page((unsigned long) buf); | ||
| 149 | goto err; | ||
| 150 | } | ||
| 151 | buf[res] = '\0'; | ||
| 152 | |||
| 153 | return buf; | ||
| 154 | |||
| 155 | err: | ||
| 156 | return ERR_PTR(res); | ||
| 157 | } | ||
| 158 | |||
| 159 | static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) | ||
| 160 | { | ||
| 161 | struct iattr attr = { | ||
| 162 | .ia_valid = | ||
| 163 | ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, | ||
| 164 | .ia_atime = stat->atime, | ||
| 165 | .ia_mtime = stat->mtime, | ||
| 166 | }; | ||
| 167 | |||
| 168 | return notify_change(upperdentry, &attr, NULL); | ||
| 169 | } | ||
| 170 | |||
| 171 | int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) | ||
| 172 | { | ||
| 173 | int err = 0; | ||
| 174 | |||
| 175 | if (!S_ISLNK(stat->mode)) { | ||
| 176 | struct iattr attr = { | ||
| 177 | .ia_valid = ATTR_MODE, | ||
| 178 | .ia_mode = stat->mode, | ||
| 179 | }; | ||
| 180 | err = notify_change(upperdentry, &attr, NULL); | ||
| 181 | } | ||
| 182 | if (!err) { | ||
| 183 | struct iattr attr = { | ||
| 184 | .ia_valid = ATTR_UID | ATTR_GID, | ||
| 185 | .ia_uid = stat->uid, | ||
| 186 | .ia_gid = stat->gid, | ||
| 187 | }; | ||
| 188 | err = notify_change(upperdentry, &attr, NULL); | ||
| 189 | } | ||
| 190 | if (!err) | ||
| 191 | ovl_set_timestamps(upperdentry, stat); | ||
| 192 | |||
| 193 | return err; | ||
| 194 | |||
| 195 | } | ||
| 196 | |||
| 197 | static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, | ||
| 198 | struct dentry *dentry, struct path *lowerpath, | ||
| 199 | struct kstat *stat, struct iattr *attr, | ||
| 200 | const char *link) | ||
| 201 | { | ||
| 202 | struct inode *wdir = workdir->d_inode; | ||
| 203 | struct inode *udir = upperdir->d_inode; | ||
| 204 | struct dentry *newdentry = NULL; | ||
| 205 | struct dentry *upper = NULL; | ||
| 206 | umode_t mode = stat->mode; | ||
| 207 | int err; | ||
| 208 | |||
| 209 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
| 210 | err = PTR_ERR(newdentry); | ||
| 211 | if (IS_ERR(newdentry)) | ||
| 212 | goto out; | ||
| 213 | |||
| 214 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 215 | dentry->d_name.len); | ||
| 216 | err = PTR_ERR(upper); | ||
| 217 | if (IS_ERR(upper)) | ||
| 218 | goto out1; | ||
| 219 | |||
| 220 | /* Can't properly set mode on creation because of the umask */ | ||
| 221 | stat->mode &= S_IFMT; | ||
| 222 | err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); | ||
| 223 | stat->mode = mode; | ||
| 224 | if (err) | ||
| 225 | goto out2; | ||
| 226 | |||
| 227 | if (S_ISREG(stat->mode)) { | ||
| 228 | struct path upperpath; | ||
| 229 | ovl_path_upper(dentry, &upperpath); | ||
| 230 | BUG_ON(upperpath.dentry != NULL); | ||
| 231 | upperpath.dentry = newdentry; | ||
| 232 | |||
| 233 | err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); | ||
| 234 | if (err) | ||
| 235 | goto out_cleanup; | ||
| 236 | } | ||
| 237 | |||
| 238 | err = ovl_copy_xattr(lowerpath->dentry, newdentry); | ||
| 239 | if (err) | ||
| 240 | goto out_cleanup; | ||
| 241 | |||
| 242 | mutex_lock(&newdentry->d_inode->i_mutex); | ||
| 243 | err = ovl_set_attr(newdentry, stat); | ||
| 244 | if (!err && attr) | ||
| 245 | err = notify_change(newdentry, attr, NULL); | ||
| 246 | mutex_unlock(&newdentry->d_inode->i_mutex); | ||
| 247 | if (err) | ||
| 248 | goto out_cleanup; | ||
| 249 | |||
| 250 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
| 251 | if (err) | ||
| 252 | goto out_cleanup; | ||
| 253 | |||
| 254 | ovl_dentry_update(dentry, newdentry); | ||
| 255 | newdentry = NULL; | ||
| 256 | |||
| 257 | /* | ||
| 258 | * Non-directores become opaque when copied up. | ||
| 259 | */ | ||
| 260 | if (!S_ISDIR(stat->mode)) | ||
| 261 | ovl_dentry_set_opaque(dentry, true); | ||
| 262 | out2: | ||
| 263 | dput(upper); | ||
| 264 | out1: | ||
| 265 | dput(newdentry); | ||
| 266 | out: | ||
| 267 | return err; | ||
| 268 | |||
| 269 | out_cleanup: | ||
| 270 | ovl_cleanup(wdir, newdentry); | ||
| 271 | goto out; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * Copy up a single dentry | ||
| 276 | * | ||
| 277 | * Directory renames only allowed on "pure upper" (already created on | ||
| 278 | * upper filesystem, never copied up). Directories which are on lower or | ||
| 279 | * are merged may not be renamed. For these -EXDEV is returned and | ||
| 280 | * userspace has to deal with it. This means, when copying up a | ||
| 281 | * directory we can rely on it and ancestors being stable. | ||
| 282 | * | ||
| 283 | * Non-directory renames start with copy up of source if necessary. The | ||
| 284 | * actual rename will only proceed once the copy up was successful. Copy | ||
| 285 | * up uses upper parent i_mutex for exclusion. Since rename can change | ||
| 286 | * d_parent it is possible that the copy up will lock the old parent. At | ||
| 287 | * that point the file will have already been copied up anyway. | ||
| 288 | */ | ||
| 289 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
| 290 | struct path *lowerpath, struct kstat *stat, | ||
| 291 | struct iattr *attr) | ||
| 292 | { | ||
| 293 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 294 | int err; | ||
| 295 | struct kstat pstat; | ||
| 296 | struct path parentpath; | ||
| 297 | struct dentry *upperdir; | ||
| 298 | struct dentry *upperdentry; | ||
| 299 | const struct cred *old_cred; | ||
| 300 | struct cred *override_cred; | ||
| 301 | char *link = NULL; | ||
| 302 | |||
| 303 | ovl_path_upper(parent, &parentpath); | ||
| 304 | upperdir = parentpath.dentry; | ||
| 305 | |||
| 306 | err = vfs_getattr(&parentpath, &pstat); | ||
| 307 | if (err) | ||
| 308 | return err; | ||
| 309 | |||
| 310 | if (S_ISLNK(stat->mode)) { | ||
| 311 | link = ovl_read_symlink(lowerpath->dentry); | ||
| 312 | if (IS_ERR(link)) | ||
| 313 | return PTR_ERR(link); | ||
| 314 | } | ||
| 315 | |||
| 316 | err = -ENOMEM; | ||
| 317 | override_cred = prepare_creds(); | ||
| 318 | if (!override_cred) | ||
| 319 | goto out_free_link; | ||
| 320 | |||
| 321 | override_cred->fsuid = stat->uid; | ||
| 322 | override_cred->fsgid = stat->gid; | ||
| 323 | /* | ||
| 324 | * CAP_SYS_ADMIN for copying up extended attributes | ||
| 325 | * CAP_DAC_OVERRIDE for create | ||
| 326 | * CAP_FOWNER for chmod, timestamp update | ||
| 327 | * CAP_FSETID for chmod | ||
| 328 | * CAP_CHOWN for chown | ||
| 329 | * CAP_MKNOD for mknod | ||
| 330 | */ | ||
| 331 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 332 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 333 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 334 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
| 335 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
| 336 | cap_raise(override_cred->cap_effective, CAP_MKNOD); | ||
| 337 | old_cred = override_creds(override_cred); | ||
| 338 | |||
| 339 | err = -EIO; | ||
| 340 | if (lock_rename(workdir, upperdir) != NULL) { | ||
| 341 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
| 342 | goto out_unlock; | ||
| 343 | } | ||
| 344 | upperdentry = ovl_dentry_upper(dentry); | ||
| 345 | if (upperdentry) { | ||
| 346 | unlock_rename(workdir, upperdir); | ||
| 347 | err = 0; | ||
| 348 | /* Raced with another copy-up? Do the setattr here */ | ||
| 349 | if (attr) { | ||
| 350 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
| 351 | err = notify_change(upperdentry, attr, NULL); | ||
| 352 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
| 353 | } | ||
| 354 | goto out_put_cred; | ||
| 355 | } | ||
| 356 | |||
| 357 | err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, | ||
| 358 | stat, attr, link); | ||
| 359 | if (!err) { | ||
| 360 | /* Restore timestamps on parent (best effort) */ | ||
| 361 | ovl_set_timestamps(upperdir, &pstat); | ||
| 362 | } | ||
| 363 | out_unlock: | ||
| 364 | unlock_rename(workdir, upperdir); | ||
| 365 | out_put_cred: | ||
| 366 | revert_creds(old_cred); | ||
| 367 | put_cred(override_cred); | ||
| 368 | |||
| 369 | out_free_link: | ||
| 370 | if (link) | ||
| 371 | free_page((unsigned long) link); | ||
| 372 | |||
| 373 | return err; | ||
| 374 | } | ||
| 375 | |||
| 376 | int ovl_copy_up(struct dentry *dentry) | ||
| 377 | { | ||
| 378 | int err; | ||
| 379 | |||
| 380 | err = 0; | ||
| 381 | while (!err) { | ||
| 382 | struct dentry *next; | ||
| 383 | struct dentry *parent; | ||
| 384 | struct path lowerpath; | ||
| 385 | struct kstat stat; | ||
| 386 | enum ovl_path_type type = ovl_path_type(dentry); | ||
| 387 | |||
| 388 | if (type != OVL_PATH_LOWER) | ||
| 389 | break; | ||
| 390 | |||
| 391 | next = dget(dentry); | ||
| 392 | /* find the topmost dentry not yet copied up */ | ||
| 393 | for (;;) { | ||
| 394 | parent = dget_parent(next); | ||
| 395 | |||
| 396 | type = ovl_path_type(parent); | ||
| 397 | if (type != OVL_PATH_LOWER) | ||
| 398 | break; | ||
| 399 | |||
| 400 | dput(next); | ||
| 401 | next = parent; | ||
| 402 | } | ||
| 403 | |||
| 404 | ovl_path_lower(next, &lowerpath); | ||
| 405 | err = vfs_getattr(&lowerpath, &stat); | ||
| 406 | if (!err) | ||
| 407 | err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); | ||
| 408 | |||
| 409 | dput(parent); | ||
| 410 | dput(next); | ||
| 411 | } | ||
| 412 | |||
| 413 | return err; | ||
| 414 | } | ||
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c new file mode 100644 index 000000000000..8ffc4b980f1b --- /dev/null +++ b/fs/overlayfs/dir.c | |||
| @@ -0,0 +1,928 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/namei.h> | ||
| 12 | #include <linux/xattr.h> | ||
| 13 | #include <linux/security.h> | ||
| 14 | #include <linux/cred.h> | ||
| 15 | #include "overlayfs.h" | ||
| 16 | |||
| 17 | void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) | ||
| 18 | { | ||
| 19 | int err; | ||
| 20 | |||
| 21 | dget(wdentry); | ||
| 22 | if (S_ISDIR(wdentry->d_inode->i_mode)) | ||
| 23 | err = ovl_do_rmdir(wdir, wdentry); | ||
| 24 | else | ||
| 25 | err = ovl_do_unlink(wdir, wdentry); | ||
| 26 | dput(wdentry); | ||
| 27 | |||
| 28 | if (err) { | ||
| 29 | pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", | ||
| 30 | wdentry, err); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) | ||
| 35 | { | ||
| 36 | struct dentry *temp; | ||
| 37 | char name[20]; | ||
| 38 | |||
| 39 | snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry); | ||
| 40 | |||
| 41 | temp = lookup_one_len(name, workdir, strlen(name)); | ||
| 42 | if (!IS_ERR(temp) && temp->d_inode) { | ||
| 43 | pr_err("overlayfs: workdir/%s already exists\n", name); | ||
| 44 | dput(temp); | ||
| 45 | temp = ERR_PTR(-EIO); | ||
| 46 | } | ||
| 47 | |||
| 48 | return temp; | ||
| 49 | } | ||
| 50 | |||
| 51 | /* caller holds i_mutex on workdir */ | ||
| 52 | static struct dentry *ovl_whiteout(struct dentry *workdir, | ||
| 53 | struct dentry *dentry) | ||
| 54 | { | ||
| 55 | int err; | ||
| 56 | struct dentry *whiteout; | ||
| 57 | struct inode *wdir = workdir->d_inode; | ||
| 58 | |||
| 59 | whiteout = ovl_lookup_temp(workdir, dentry); | ||
| 60 | if (IS_ERR(whiteout)) | ||
| 61 | return whiteout; | ||
| 62 | |||
| 63 | err = ovl_do_whiteout(wdir, whiteout); | ||
| 64 | if (err) { | ||
| 65 | dput(whiteout); | ||
| 66 | whiteout = ERR_PTR(err); | ||
| 67 | } | ||
| 68 | |||
| 69 | return whiteout; | ||
| 70 | } | ||
| 71 | |||
| 72 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
| 73 | struct kstat *stat, const char *link, | ||
| 74 | struct dentry *hardlink, bool debug) | ||
| 75 | { | ||
| 76 | int err; | ||
| 77 | |||
| 78 | if (newdentry->d_inode) | ||
| 79 | return -ESTALE; | ||
| 80 | |||
| 81 | if (hardlink) { | ||
| 82 | err = ovl_do_link(hardlink, dir, newdentry, debug); | ||
| 83 | } else { | ||
| 84 | switch (stat->mode & S_IFMT) { | ||
| 85 | case S_IFREG: | ||
| 86 | err = ovl_do_create(dir, newdentry, stat->mode, debug); | ||
| 87 | break; | ||
| 88 | |||
| 89 | case S_IFDIR: | ||
| 90 | err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); | ||
| 91 | break; | ||
| 92 | |||
| 93 | case S_IFCHR: | ||
| 94 | case S_IFBLK: | ||
| 95 | case S_IFIFO: | ||
| 96 | case S_IFSOCK: | ||
| 97 | err = ovl_do_mknod(dir, newdentry, | ||
| 98 | stat->mode, stat->rdev, debug); | ||
| 99 | break; | ||
| 100 | |||
| 101 | case S_IFLNK: | ||
| 102 | err = ovl_do_symlink(dir, newdentry, link, debug); | ||
| 103 | break; | ||
| 104 | |||
| 105 | default: | ||
| 106 | err = -EPERM; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | if (!err && WARN_ON(!newdentry->d_inode)) { | ||
| 110 | /* | ||
| 111 | * Not quite sure if non-instantiated dentry is legal or not. | ||
| 112 | * VFS doesn't seem to care so check and warn here. | ||
| 113 | */ | ||
| 114 | err = -ENOENT; | ||
| 115 | } | ||
| 116 | return err; | ||
| 117 | } | ||
| 118 | |||
| 119 | static int ovl_set_opaque(struct dentry *upperdentry) | ||
| 120 | { | ||
| 121 | return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); | ||
| 122 | } | ||
| 123 | |||
| 124 | static void ovl_remove_opaque(struct dentry *upperdentry) | ||
| 125 | { | ||
| 126 | int err; | ||
| 127 | |||
| 128 | err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); | ||
| 129 | if (err) { | ||
| 130 | pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", | ||
| 131 | upperdentry->d_name.name, err); | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 136 | struct kstat *stat) | ||
| 137 | { | ||
| 138 | int err; | ||
| 139 | enum ovl_path_type type; | ||
| 140 | struct path realpath; | ||
| 141 | |||
| 142 | type = ovl_path_real(dentry, &realpath); | ||
| 143 | err = vfs_getattr(&realpath, stat); | ||
| 144 | if (err) | ||
| 145 | return err; | ||
| 146 | |||
| 147 | stat->dev = dentry->d_sb->s_dev; | ||
| 148 | stat->ino = dentry->d_inode->i_ino; | ||
| 149 | |||
| 150 | /* | ||
| 151 | * It's probably not worth it to count subdirs to get the | ||
| 152 | * correct link count. nlink=1 seems to pacify 'find' and | ||
| 153 | * other utilities. | ||
| 154 | */ | ||
| 155 | if (type == OVL_PATH_MERGE) | ||
| 156 | stat->nlink = 1; | ||
| 157 | |||
| 158 | return 0; | ||
| 159 | } | ||
| 160 | |||
| 161 | static int ovl_create_upper(struct dentry *dentry, struct inode *inode, | ||
| 162 | struct kstat *stat, const char *link, | ||
| 163 | struct dentry *hardlink) | ||
| 164 | { | ||
| 165 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 166 | struct inode *udir = upperdir->d_inode; | ||
| 167 | struct dentry *newdentry; | ||
| 168 | int err; | ||
| 169 | |||
| 170 | mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT); | ||
| 171 | newdentry = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 172 | dentry->d_name.len); | ||
| 173 | err = PTR_ERR(newdentry); | ||
| 174 | if (IS_ERR(newdentry)) | ||
| 175 | goto out_unlock; | ||
| 176 | err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); | ||
| 177 | if (err) | ||
| 178 | goto out_dput; | ||
| 179 | |||
| 180 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 181 | ovl_dentry_update(dentry, newdentry); | ||
| 182 | ovl_copyattr(newdentry->d_inode, inode); | ||
| 183 | d_instantiate(dentry, inode); | ||
| 184 | newdentry = NULL; | ||
| 185 | out_dput: | ||
| 186 | dput(newdentry); | ||
| 187 | out_unlock: | ||
| 188 | mutex_unlock(&udir->i_mutex); | ||
| 189 | return err; | ||
| 190 | } | ||
| 191 | |||
| 192 | static int ovl_lock_rename_workdir(struct dentry *workdir, | ||
| 193 | struct dentry *upperdir) | ||
| 194 | { | ||
| 195 | /* Workdir should not be the same as upperdir */ | ||
| 196 | if (workdir == upperdir) | ||
| 197 | goto err; | ||
| 198 | |||
| 199 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
| 200 | if (lock_rename(workdir, upperdir) != NULL) | ||
| 201 | goto err_unlock; | ||
| 202 | |||
| 203 | return 0; | ||
| 204 | |||
| 205 | err_unlock: | ||
| 206 | unlock_rename(workdir, upperdir); | ||
| 207 | err: | ||
| 208 | pr_err("overlayfs: failed to lock workdir+upperdir\n"); | ||
| 209 | return -EIO; | ||
| 210 | } | ||
| 211 | |||
| 212 | static struct dentry *ovl_clear_empty(struct dentry *dentry, | ||
| 213 | struct list_head *list) | ||
| 214 | { | ||
| 215 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 216 | struct inode *wdir = workdir->d_inode; | ||
| 217 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 218 | struct inode *udir = upperdir->d_inode; | ||
| 219 | struct path upperpath; | ||
| 220 | struct dentry *upper; | ||
| 221 | struct dentry *opaquedir; | ||
| 222 | struct kstat stat; | ||
| 223 | int err; | ||
| 224 | |||
| 225 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
| 226 | if (err) | ||
| 227 | goto out; | ||
| 228 | |||
| 229 | ovl_path_upper(dentry, &upperpath); | ||
| 230 | err = vfs_getattr(&upperpath, &stat); | ||
| 231 | if (err) | ||
| 232 | goto out_unlock; | ||
| 233 | |||
| 234 | err = -ESTALE; | ||
| 235 | if (!S_ISDIR(stat.mode)) | ||
| 236 | goto out_unlock; | ||
| 237 | upper = upperpath.dentry; | ||
| 238 | if (upper->d_parent->d_inode != udir) | ||
| 239 | goto out_unlock; | ||
| 240 | |||
| 241 | opaquedir = ovl_lookup_temp(workdir, dentry); | ||
| 242 | err = PTR_ERR(opaquedir); | ||
| 243 | if (IS_ERR(opaquedir)) | ||
| 244 | goto out_unlock; | ||
| 245 | |||
| 246 | err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); | ||
| 247 | if (err) | ||
| 248 | goto out_dput; | ||
| 249 | |||
| 250 | err = ovl_copy_xattr(upper, opaquedir); | ||
| 251 | if (err) | ||
| 252 | goto out_cleanup; | ||
| 253 | |||
| 254 | err = ovl_set_opaque(opaquedir); | ||
| 255 | if (err) | ||
| 256 | goto out_cleanup; | ||
| 257 | |||
| 258 | mutex_lock(&opaquedir->d_inode->i_mutex); | ||
| 259 | err = ovl_set_attr(opaquedir, &stat); | ||
| 260 | mutex_unlock(&opaquedir->d_inode->i_mutex); | ||
| 261 | if (err) | ||
| 262 | goto out_cleanup; | ||
| 263 | |||
| 264 | err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); | ||
| 265 | if (err) | ||
| 266 | goto out_cleanup; | ||
| 267 | |||
| 268 | ovl_cleanup_whiteouts(upper, list); | ||
| 269 | ovl_cleanup(wdir, upper); | ||
| 270 | unlock_rename(workdir, upperdir); | ||
| 271 | |||
| 272 | /* dentry's upper doesn't match now, get rid of it */ | ||
| 273 | d_drop(dentry); | ||
| 274 | |||
| 275 | return opaquedir; | ||
| 276 | |||
| 277 | out_cleanup: | ||
| 278 | ovl_cleanup(wdir, opaquedir); | ||
| 279 | out_dput: | ||
| 280 | dput(opaquedir); | ||
| 281 | out_unlock: | ||
| 282 | unlock_rename(workdir, upperdir); | ||
| 283 | out: | ||
| 284 | return ERR_PTR(err); | ||
| 285 | } | ||
| 286 | |||
| 287 | static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) | ||
| 288 | { | ||
| 289 | int err; | ||
| 290 | struct dentry *ret = NULL; | ||
| 291 | LIST_HEAD(list); | ||
| 292 | |||
| 293 | err = ovl_check_empty_dir(dentry, &list); | ||
| 294 | if (err) | ||
| 295 | ret = ERR_PTR(err); | ||
| 296 | else { | ||
| 297 | /* | ||
| 298 | * If no upperdentry then skip clearing whiteouts. | ||
| 299 | * | ||
| 300 | * Can race with copy-up, since we don't hold the upperdir | ||
| 301 | * mutex. Doesn't matter, since copy-up can't create a | ||
| 302 | * non-empty directory from an empty one. | ||
| 303 | */ | ||
| 304 | if (ovl_dentry_upper(dentry)) | ||
| 305 | ret = ovl_clear_empty(dentry, &list); | ||
| 306 | } | ||
| 307 | |||
| 308 | ovl_cache_free(&list); | ||
| 309 | |||
| 310 | return ret; | ||
| 311 | } | ||
| 312 | |||
| 313 | static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, | ||
| 314 | struct kstat *stat, const char *link, | ||
| 315 | struct dentry *hardlink) | ||
| 316 | { | ||
| 317 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 318 | struct inode *wdir = workdir->d_inode; | ||
| 319 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 320 | struct inode *udir = upperdir->d_inode; | ||
| 321 | struct dentry *upper; | ||
| 322 | struct dentry *newdentry; | ||
| 323 | int err; | ||
| 324 | |||
| 325 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
| 326 | if (err) | ||
| 327 | goto out; | ||
| 328 | |||
| 329 | newdentry = ovl_lookup_temp(workdir, dentry); | ||
| 330 | err = PTR_ERR(newdentry); | ||
| 331 | if (IS_ERR(newdentry)) | ||
| 332 | goto out_unlock; | ||
| 333 | |||
| 334 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 335 | dentry->d_name.len); | ||
| 336 | err = PTR_ERR(upper); | ||
| 337 | if (IS_ERR(upper)) | ||
| 338 | goto out_dput; | ||
| 339 | |||
| 340 | err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); | ||
| 341 | if (err) | ||
| 342 | goto out_dput2; | ||
| 343 | |||
| 344 | if (S_ISDIR(stat->mode)) { | ||
| 345 | err = ovl_set_opaque(newdentry); | ||
| 346 | if (err) | ||
| 347 | goto out_cleanup; | ||
| 348 | |||
| 349 | err = ovl_do_rename(wdir, newdentry, udir, upper, | ||
| 350 | RENAME_EXCHANGE); | ||
| 351 | if (err) | ||
| 352 | goto out_cleanup; | ||
| 353 | |||
| 354 | ovl_cleanup(wdir, upper); | ||
| 355 | } else { | ||
| 356 | err = ovl_do_rename(wdir, newdentry, udir, upper, 0); | ||
| 357 | if (err) | ||
| 358 | goto out_cleanup; | ||
| 359 | } | ||
| 360 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 361 | ovl_dentry_update(dentry, newdentry); | ||
| 362 | ovl_copyattr(newdentry->d_inode, inode); | ||
| 363 | d_instantiate(dentry, inode); | ||
| 364 | newdentry = NULL; | ||
| 365 | out_dput2: | ||
| 366 | dput(upper); | ||
| 367 | out_dput: | ||
| 368 | dput(newdentry); | ||
| 369 | out_unlock: | ||
| 370 | unlock_rename(workdir, upperdir); | ||
| 371 | out: | ||
| 372 | return err; | ||
| 373 | |||
| 374 | out_cleanup: | ||
| 375 | ovl_cleanup(wdir, newdentry); | ||
| 376 | goto out_dput2; | ||
| 377 | } | ||
| 378 | |||
| 379 | static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, | ||
| 380 | const char *link, struct dentry *hardlink) | ||
| 381 | { | ||
| 382 | int err; | ||
| 383 | struct inode *inode; | ||
| 384 | struct kstat stat = { | ||
| 385 | .mode = mode, | ||
| 386 | .rdev = rdev, | ||
| 387 | }; | ||
| 388 | |||
| 389 | err = -ENOMEM; | ||
| 390 | inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); | ||
| 391 | if (!inode) | ||
| 392 | goto out; | ||
| 393 | |||
| 394 | err = ovl_copy_up(dentry->d_parent); | ||
| 395 | if (err) | ||
| 396 | goto out_iput; | ||
| 397 | |||
| 398 | if (!ovl_dentry_is_opaque(dentry)) { | ||
| 399 | err = ovl_create_upper(dentry, inode, &stat, link, hardlink); | ||
| 400 | } else { | ||
| 401 | const struct cred *old_cred; | ||
| 402 | struct cred *override_cred; | ||
| 403 | |||
| 404 | err = -ENOMEM; | ||
| 405 | override_cred = prepare_creds(); | ||
| 406 | if (!override_cred) | ||
| 407 | goto out_iput; | ||
| 408 | |||
| 409 | /* | ||
| 410 | * CAP_SYS_ADMIN for setting opaque xattr | ||
| 411 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
| 412 | * CAP_FOWNER for removing whiteout from sticky dir | ||
| 413 | */ | ||
| 414 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 415 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 416 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 417 | old_cred = override_creds(override_cred); | ||
| 418 | |||
| 419 | err = ovl_create_over_whiteout(dentry, inode, &stat, link, | ||
| 420 | hardlink); | ||
| 421 | |||
| 422 | revert_creds(old_cred); | ||
| 423 | put_cred(override_cred); | ||
| 424 | } | ||
| 425 | |||
| 426 | if (!err) | ||
| 427 | inode = NULL; | ||
| 428 | out_iput: | ||
| 429 | iput(inode); | ||
| 430 | out: | ||
| 431 | return err; | ||
| 432 | } | ||
| 433 | |||
| 434 | static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, | ||
| 435 | const char *link) | ||
| 436 | { | ||
| 437 | int err; | ||
| 438 | |||
| 439 | err = ovl_want_write(dentry); | ||
| 440 | if (!err) { | ||
| 441 | err = ovl_create_or_link(dentry, mode, rdev, link, NULL); | ||
| 442 | ovl_drop_write(dentry); | ||
| 443 | } | ||
| 444 | |||
| 445 | return err; | ||
| 446 | } | ||
| 447 | |||
| 448 | static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
| 449 | bool excl) | ||
| 450 | { | ||
| 451 | return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); | ||
| 452 | } | ||
| 453 | |||
| 454 | static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
| 455 | { | ||
| 456 | return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); | ||
| 457 | } | ||
| 458 | |||
| 459 | static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | ||
| 460 | dev_t rdev) | ||
| 461 | { | ||
| 462 | /* Don't allow creation of "whiteout" on overlay */ | ||
| 463 | if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) | ||
| 464 | return -EPERM; | ||
| 465 | |||
| 466 | return ovl_create_object(dentry, mode, rdev, NULL); | ||
| 467 | } | ||
| 468 | |||
| 469 | static int ovl_symlink(struct inode *dir, struct dentry *dentry, | ||
| 470 | const char *link) | ||
| 471 | { | ||
| 472 | return ovl_create_object(dentry, S_IFLNK, 0, link); | ||
| 473 | } | ||
| 474 | |||
| 475 | static int ovl_link(struct dentry *old, struct inode *newdir, | ||
| 476 | struct dentry *new) | ||
| 477 | { | ||
| 478 | int err; | ||
| 479 | struct dentry *upper; | ||
| 480 | |||
| 481 | err = ovl_want_write(old); | ||
| 482 | if (err) | ||
| 483 | goto out; | ||
| 484 | |||
| 485 | err = ovl_copy_up(old); | ||
| 486 | if (err) | ||
| 487 | goto out_drop_write; | ||
| 488 | |||
| 489 | upper = ovl_dentry_upper(old); | ||
| 490 | err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper); | ||
| 491 | |||
| 492 | out_drop_write: | ||
| 493 | ovl_drop_write(old); | ||
| 494 | out: | ||
| 495 | return err; | ||
| 496 | } | ||
| 497 | |||
| 498 | static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) | ||
| 499 | { | ||
| 500 | struct dentry *workdir = ovl_workdir(dentry); | ||
| 501 | struct inode *wdir = workdir->d_inode; | ||
| 502 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 503 | struct inode *udir = upperdir->d_inode; | ||
| 504 | struct dentry *whiteout; | ||
| 505 | struct dentry *upper; | ||
| 506 | struct dentry *opaquedir = NULL; | ||
| 507 | int err; | ||
| 508 | |||
| 509 | if (is_dir) { | ||
| 510 | opaquedir = ovl_check_empty_and_clear(dentry); | ||
| 511 | err = PTR_ERR(opaquedir); | ||
| 512 | if (IS_ERR(opaquedir)) | ||
| 513 | goto out; | ||
| 514 | } | ||
| 515 | |||
| 516 | err = ovl_lock_rename_workdir(workdir, upperdir); | ||
| 517 | if (err) | ||
| 518 | goto out_dput; | ||
| 519 | |||
| 520 | whiteout = ovl_whiteout(workdir, dentry); | ||
| 521 | err = PTR_ERR(whiteout); | ||
| 522 | if (IS_ERR(whiteout)) | ||
| 523 | goto out_unlock; | ||
| 524 | |||
| 525 | upper = ovl_dentry_upper(dentry); | ||
| 526 | if (!upper) { | ||
| 527 | upper = lookup_one_len(dentry->d_name.name, upperdir, | ||
| 528 | dentry->d_name.len); | ||
| 529 | err = PTR_ERR(upper); | ||
| 530 | if (IS_ERR(upper)) | ||
| 531 | goto kill_whiteout; | ||
| 532 | |||
| 533 | err = ovl_do_rename(wdir, whiteout, udir, upper, 0); | ||
| 534 | dput(upper); | ||
| 535 | if (err) | ||
| 536 | goto kill_whiteout; | ||
| 537 | } else { | ||
| 538 | int flags = 0; | ||
| 539 | |||
| 540 | if (opaquedir) | ||
| 541 | upper = opaquedir; | ||
| 542 | err = -ESTALE; | ||
| 543 | if (upper->d_parent != upperdir) | ||
| 544 | goto kill_whiteout; | ||
| 545 | |||
| 546 | if (is_dir) | ||
| 547 | flags |= RENAME_EXCHANGE; | ||
| 548 | |||
| 549 | err = ovl_do_rename(wdir, whiteout, udir, upper, flags); | ||
| 550 | if (err) | ||
| 551 | goto kill_whiteout; | ||
| 552 | |||
| 553 | if (is_dir) | ||
| 554 | ovl_cleanup(wdir, upper); | ||
| 555 | } | ||
| 556 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 557 | out_d_drop: | ||
| 558 | d_drop(dentry); | ||
| 559 | dput(whiteout); | ||
| 560 | out_unlock: | ||
| 561 | unlock_rename(workdir, upperdir); | ||
| 562 | out_dput: | ||
| 563 | dput(opaquedir); | ||
| 564 | out: | ||
| 565 | return err; | ||
| 566 | |||
| 567 | kill_whiteout: | ||
| 568 | ovl_cleanup(wdir, whiteout); | ||
| 569 | goto out_d_drop; | ||
| 570 | } | ||
| 571 | |||
| 572 | static int ovl_remove_upper(struct dentry *dentry, bool is_dir) | ||
| 573 | { | ||
| 574 | struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 575 | struct inode *dir = upperdir->d_inode; | ||
| 576 | struct dentry *upper = ovl_dentry_upper(dentry); | ||
| 577 | int err; | ||
| 578 | |||
| 579 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
| 580 | err = -ESTALE; | ||
| 581 | if (upper->d_parent == upperdir) { | ||
| 582 | /* Don't let d_delete() think it can reset d_inode */ | ||
| 583 | dget(upper); | ||
| 584 | if (is_dir) | ||
| 585 | err = vfs_rmdir(dir, upper); | ||
| 586 | else | ||
| 587 | err = vfs_unlink(dir, upper, NULL); | ||
| 588 | dput(upper); | ||
| 589 | ovl_dentry_version_inc(dentry->d_parent); | ||
| 590 | } | ||
| 591 | |||
| 592 | /* | ||
| 593 | * Keeping this dentry hashed would mean having to release | ||
| 594 | * upperpath/lowerpath, which could only be done if we are the | ||
| 595 | * sole user of this dentry. Too tricky... Just unhash for | ||
| 596 | * now. | ||
| 597 | */ | ||
| 598 | d_drop(dentry); | ||
| 599 | mutex_unlock(&dir->i_mutex); | ||
| 600 | |||
| 601 | return err; | ||
| 602 | } | ||
| 603 | |||
| 604 | static inline int ovl_check_sticky(struct dentry *dentry) | ||
| 605 | { | ||
| 606 | struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; | ||
| 607 | struct inode *inode = ovl_dentry_real(dentry)->d_inode; | ||
| 608 | |||
| 609 | if (check_sticky(dir, inode)) | ||
| 610 | return -EPERM; | ||
| 611 | |||
| 612 | return 0; | ||
| 613 | } | ||
| 614 | |||
| 615 | static int ovl_do_remove(struct dentry *dentry, bool is_dir) | ||
| 616 | { | ||
| 617 | enum ovl_path_type type; | ||
| 618 | int err; | ||
| 619 | |||
| 620 | err = ovl_check_sticky(dentry); | ||
| 621 | if (err) | ||
| 622 | goto out; | ||
| 623 | |||
| 624 | err = ovl_want_write(dentry); | ||
| 625 | if (err) | ||
| 626 | goto out; | ||
| 627 | |||
| 628 | err = ovl_copy_up(dentry->d_parent); | ||
| 629 | if (err) | ||
| 630 | goto out_drop_write; | ||
| 631 | |||
| 632 | type = ovl_path_type(dentry); | ||
| 633 | if (type == OVL_PATH_PURE_UPPER) { | ||
| 634 | err = ovl_remove_upper(dentry, is_dir); | ||
| 635 | } else { | ||
| 636 | const struct cred *old_cred; | ||
| 637 | struct cred *override_cred; | ||
| 638 | |||
| 639 | err = -ENOMEM; | ||
| 640 | override_cred = prepare_creds(); | ||
| 641 | if (!override_cred) | ||
| 642 | goto out_drop_write; | ||
| 643 | |||
| 644 | /* | ||
| 645 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
| 646 | * CAP_DAC_OVERRIDE for create in workdir, rename | ||
| 647 | * CAP_FOWNER for removing whiteout from sticky dir | ||
| 648 | * CAP_FSETID for chmod of opaque dir | ||
| 649 | * CAP_CHOWN for chown of opaque dir | ||
| 650 | */ | ||
| 651 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 652 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 653 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 654 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
| 655 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
| 656 | old_cred = override_creds(override_cred); | ||
| 657 | |||
| 658 | err = ovl_remove_and_whiteout(dentry, is_dir); | ||
| 659 | |||
| 660 | revert_creds(old_cred); | ||
| 661 | put_cred(override_cred); | ||
| 662 | } | ||
| 663 | out_drop_write: | ||
| 664 | ovl_drop_write(dentry); | ||
| 665 | out: | ||
| 666 | return err; | ||
| 667 | } | ||
| 668 | |||
| 669 | static int ovl_unlink(struct inode *dir, struct dentry *dentry) | ||
| 670 | { | ||
| 671 | return ovl_do_remove(dentry, false); | ||
| 672 | } | ||
| 673 | |||
| 674 | static int ovl_rmdir(struct inode *dir, struct dentry *dentry) | ||
| 675 | { | ||
| 676 | return ovl_do_remove(dentry, true); | ||
| 677 | } | ||
| 678 | |||
| 679 | static int ovl_rename2(struct inode *olddir, struct dentry *old, | ||
| 680 | struct inode *newdir, struct dentry *new, | ||
| 681 | unsigned int flags) | ||
| 682 | { | ||
| 683 | int err; | ||
| 684 | enum ovl_path_type old_type; | ||
| 685 | enum ovl_path_type new_type; | ||
| 686 | struct dentry *old_upperdir; | ||
| 687 | struct dentry *new_upperdir; | ||
| 688 | struct dentry *olddentry; | ||
| 689 | struct dentry *newdentry; | ||
| 690 | struct dentry *trap; | ||
| 691 | bool old_opaque; | ||
| 692 | bool new_opaque; | ||
| 693 | bool new_create = false; | ||
| 694 | bool cleanup_whiteout = false; | ||
| 695 | bool overwrite = !(flags & RENAME_EXCHANGE); | ||
| 696 | bool is_dir = S_ISDIR(old->d_inode->i_mode); | ||
| 697 | bool new_is_dir = false; | ||
| 698 | struct dentry *opaquedir = NULL; | ||
| 699 | const struct cred *old_cred = NULL; | ||
| 700 | struct cred *override_cred = NULL; | ||
| 701 | |||
| 702 | err = -EINVAL; | ||
| 703 | if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) | ||
| 704 | goto out; | ||
| 705 | |||
| 706 | flags &= ~RENAME_NOREPLACE; | ||
| 707 | |||
| 708 | err = ovl_check_sticky(old); | ||
| 709 | if (err) | ||
| 710 | goto out; | ||
| 711 | |||
| 712 | /* Don't copy up directory trees */ | ||
| 713 | old_type = ovl_path_type(old); | ||
| 714 | err = -EXDEV; | ||
| 715 | if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) | ||
| 716 | goto out; | ||
| 717 | |||
| 718 | if (new->d_inode) { | ||
| 719 | err = ovl_check_sticky(new); | ||
| 720 | if (err) | ||
| 721 | goto out; | ||
| 722 | |||
| 723 | if (S_ISDIR(new->d_inode->i_mode)) | ||
| 724 | new_is_dir = true; | ||
| 725 | |||
| 726 | new_type = ovl_path_type(new); | ||
| 727 | err = -EXDEV; | ||
| 728 | if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) | ||
| 729 | goto out; | ||
| 730 | |||
| 731 | err = 0; | ||
| 732 | if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { | ||
| 733 | if (ovl_dentry_lower(old)->d_inode == | ||
| 734 | ovl_dentry_lower(new)->d_inode) | ||
| 735 | goto out; | ||
| 736 | } | ||
| 737 | if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { | ||
| 738 | if (ovl_dentry_upper(old)->d_inode == | ||
| 739 | ovl_dentry_upper(new)->d_inode) | ||
| 740 | goto out; | ||
| 741 | } | ||
| 742 | } else { | ||
| 743 | if (ovl_dentry_is_opaque(new)) | ||
| 744 | new_type = OVL_PATH_UPPER; | ||
| 745 | else | ||
| 746 | new_type = OVL_PATH_PURE_UPPER; | ||
| 747 | } | ||
| 748 | |||
| 749 | err = ovl_want_write(old); | ||
| 750 | if (err) | ||
| 751 | goto out; | ||
| 752 | |||
| 753 | err = ovl_copy_up(old); | ||
| 754 | if (err) | ||
| 755 | goto out_drop_write; | ||
| 756 | |||
| 757 | err = ovl_copy_up(new->d_parent); | ||
| 758 | if (err) | ||
| 759 | goto out_drop_write; | ||
| 760 | if (!overwrite) { | ||
| 761 | err = ovl_copy_up(new); | ||
| 762 | if (err) | ||
| 763 | goto out_drop_write; | ||
| 764 | } | ||
| 765 | |||
| 766 | old_opaque = old_type != OVL_PATH_PURE_UPPER; | ||
| 767 | new_opaque = new_type != OVL_PATH_PURE_UPPER; | ||
| 768 | |||
| 769 | if (old_opaque || new_opaque) { | ||
| 770 | err = -ENOMEM; | ||
| 771 | override_cred = prepare_creds(); | ||
| 772 | if (!override_cred) | ||
| 773 | goto out_drop_write; | ||
| 774 | |||
| 775 | /* | ||
| 776 | * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir | ||
| 777 | * CAP_DAC_OVERRIDE for create in workdir | ||
| 778 | * CAP_FOWNER for removing whiteout from sticky dir | ||
| 779 | * CAP_FSETID for chmod of opaque dir | ||
| 780 | * CAP_CHOWN for chown of opaque dir | ||
| 781 | */ | ||
| 782 | cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); | ||
| 783 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 784 | cap_raise(override_cred->cap_effective, CAP_FOWNER); | ||
| 785 | cap_raise(override_cred->cap_effective, CAP_FSETID); | ||
| 786 | cap_raise(override_cred->cap_effective, CAP_CHOWN); | ||
| 787 | old_cred = override_creds(override_cred); | ||
| 788 | } | ||
| 789 | |||
| 790 | if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { | ||
| 791 | opaquedir = ovl_check_empty_and_clear(new); | ||
| 792 | err = PTR_ERR(opaquedir); | ||
| 793 | if (IS_ERR(opaquedir)) { | ||
| 794 | opaquedir = NULL; | ||
| 795 | goto out_revert_creds; | ||
| 796 | } | ||
| 797 | } | ||
| 798 | |||
| 799 | if (overwrite) { | ||
| 800 | if (old_opaque) { | ||
| 801 | if (new->d_inode || !new_opaque) { | ||
| 802 | /* Whiteout source */ | ||
| 803 | flags |= RENAME_WHITEOUT; | ||
| 804 | } else { | ||
| 805 | /* Switch whiteouts */ | ||
| 806 | flags |= RENAME_EXCHANGE; | ||
| 807 | } | ||
| 808 | } else if (is_dir && !new->d_inode && new_opaque) { | ||
| 809 | flags |= RENAME_EXCHANGE; | ||
| 810 | cleanup_whiteout = true; | ||
| 811 | } | ||
| 812 | } | ||
| 813 | |||
| 814 | old_upperdir = ovl_dentry_upper(old->d_parent); | ||
| 815 | new_upperdir = ovl_dentry_upper(new->d_parent); | ||
| 816 | |||
| 817 | trap = lock_rename(new_upperdir, old_upperdir); | ||
| 818 | |||
| 819 | olddentry = ovl_dentry_upper(old); | ||
| 820 | newdentry = ovl_dentry_upper(new); | ||
| 821 | if (newdentry) { | ||
| 822 | if (opaquedir) { | ||
| 823 | newdentry = opaquedir; | ||
| 824 | opaquedir = NULL; | ||
| 825 | } else { | ||
| 826 | dget(newdentry); | ||
| 827 | } | ||
| 828 | } else { | ||
| 829 | new_create = true; | ||
| 830 | newdentry = lookup_one_len(new->d_name.name, new_upperdir, | ||
| 831 | new->d_name.len); | ||
| 832 | err = PTR_ERR(newdentry); | ||
| 833 | if (IS_ERR(newdentry)) | ||
| 834 | goto out_unlock; | ||
| 835 | } | ||
| 836 | |||
| 837 | err = -ESTALE; | ||
| 838 | if (olddentry->d_parent != old_upperdir) | ||
| 839 | goto out_dput; | ||
| 840 | if (newdentry->d_parent != new_upperdir) | ||
| 841 | goto out_dput; | ||
| 842 | if (olddentry == trap) | ||
| 843 | goto out_dput; | ||
| 844 | if (newdentry == trap) | ||
| 845 | goto out_dput; | ||
| 846 | |||
| 847 | if (is_dir && !old_opaque && new_opaque) { | ||
| 848 | err = ovl_set_opaque(olddentry); | ||
| 849 | if (err) | ||
| 850 | goto out_dput; | ||
| 851 | } | ||
| 852 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) { | ||
| 853 | err = ovl_set_opaque(newdentry); | ||
| 854 | if (err) | ||
| 855 | goto out_dput; | ||
| 856 | } | ||
| 857 | |||
| 858 | if (old_opaque || new_opaque) { | ||
| 859 | err = ovl_do_rename(old_upperdir->d_inode, olddentry, | ||
| 860 | new_upperdir->d_inode, newdentry, | ||
| 861 | flags); | ||
| 862 | } else { | ||
| 863 | /* No debug for the plain case */ | ||
| 864 | BUG_ON(flags & ~RENAME_EXCHANGE); | ||
| 865 | err = vfs_rename(old_upperdir->d_inode, olddentry, | ||
| 866 | new_upperdir->d_inode, newdentry, | ||
| 867 | NULL, flags); | ||
| 868 | } | ||
| 869 | |||
| 870 | if (err) { | ||
| 871 | if (is_dir && !old_opaque && new_opaque) | ||
| 872 | ovl_remove_opaque(olddentry); | ||
| 873 | if (!overwrite && new_is_dir && old_opaque && !new_opaque) | ||
| 874 | ovl_remove_opaque(newdentry); | ||
| 875 | goto out_dput; | ||
| 876 | } | ||
| 877 | |||
| 878 | if (is_dir && old_opaque && !new_opaque) | ||
| 879 | ovl_remove_opaque(olddentry); | ||
| 880 | if (!overwrite && new_is_dir && !old_opaque && new_opaque) | ||
| 881 | ovl_remove_opaque(newdentry); | ||
| 882 | |||
| 883 | if (old_opaque != new_opaque) { | ||
| 884 | ovl_dentry_set_opaque(old, new_opaque); | ||
| 885 | if (!overwrite) | ||
| 886 | ovl_dentry_set_opaque(new, old_opaque); | ||
| 887 | } | ||
| 888 | |||
| 889 | if (cleanup_whiteout) | ||
| 890 | ovl_cleanup(old_upperdir->d_inode, newdentry); | ||
| 891 | |||
| 892 | ovl_dentry_version_inc(old->d_parent); | ||
| 893 | ovl_dentry_version_inc(new->d_parent); | ||
| 894 | |||
| 895 | out_dput: | ||
| 896 | dput(newdentry); | ||
| 897 | out_unlock: | ||
| 898 | unlock_rename(new_upperdir, old_upperdir); | ||
| 899 | out_revert_creds: | ||
| 900 | if (old_opaque || new_opaque) { | ||
| 901 | revert_creds(old_cred); | ||
| 902 | put_cred(override_cred); | ||
| 903 | } | ||
| 904 | out_drop_write: | ||
| 905 | ovl_drop_write(old); | ||
| 906 | out: | ||
| 907 | dput(opaquedir); | ||
| 908 | return err; | ||
| 909 | } | ||
| 910 | |||
| 911 | const struct inode_operations ovl_dir_inode_operations = { | ||
| 912 | .lookup = ovl_lookup, | ||
| 913 | .mkdir = ovl_mkdir, | ||
| 914 | .symlink = ovl_symlink, | ||
| 915 | .unlink = ovl_unlink, | ||
| 916 | .rmdir = ovl_rmdir, | ||
| 917 | .rename2 = ovl_rename2, | ||
| 918 | .link = ovl_link, | ||
| 919 | .setattr = ovl_setattr, | ||
| 920 | .create = ovl_create, | ||
| 921 | .mknod = ovl_mknod, | ||
| 922 | .permission = ovl_permission, | ||
| 923 | .getattr = ovl_dir_getattr, | ||
| 924 | .setxattr = ovl_setxattr, | ||
| 925 | .getxattr = ovl_getxattr, | ||
| 926 | .listxattr = ovl_listxattr, | ||
| 927 | .removexattr = ovl_removexattr, | ||
| 928 | }; | ||
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c new file mode 100644 index 000000000000..07d74b24913b --- /dev/null +++ b/fs/overlayfs/inode.c | |||
| @@ -0,0 +1,434 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/xattr.h> | ||
| 13 | #include "overlayfs.h" | ||
| 14 | |||
| 15 | static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, | ||
| 16 | bool no_data) | ||
| 17 | { | ||
| 18 | int err; | ||
| 19 | struct dentry *parent; | ||
| 20 | struct kstat stat; | ||
| 21 | struct path lowerpath; | ||
| 22 | |||
| 23 | parent = dget_parent(dentry); | ||
| 24 | err = ovl_copy_up(parent); | ||
| 25 | if (err) | ||
| 26 | goto out_dput_parent; | ||
| 27 | |||
| 28 | ovl_path_lower(dentry, &lowerpath); | ||
| 29 | err = vfs_getattr(&lowerpath, &stat); | ||
| 30 | if (err) | ||
| 31 | goto out_dput_parent; | ||
| 32 | |||
| 33 | if (no_data) | ||
| 34 | stat.size = 0; | ||
| 35 | |||
| 36 | err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); | ||
| 37 | |||
| 38 | out_dput_parent: | ||
| 39 | dput(parent); | ||
| 40 | return err; | ||
| 41 | } | ||
| 42 | |||
| 43 | int ovl_setattr(struct dentry *dentry, struct iattr *attr) | ||
| 44 | { | ||
| 45 | int err; | ||
| 46 | struct dentry *upperdentry; | ||
| 47 | |||
| 48 | err = ovl_want_write(dentry); | ||
| 49 | if (err) | ||
| 50 | goto out; | ||
| 51 | |||
| 52 | upperdentry = ovl_dentry_upper(dentry); | ||
| 53 | if (upperdentry) { | ||
| 54 | mutex_lock(&upperdentry->d_inode->i_mutex); | ||
| 55 | err = notify_change(upperdentry, attr, NULL); | ||
| 56 | mutex_unlock(&upperdentry->d_inode->i_mutex); | ||
| 57 | } else { | ||
| 58 | err = ovl_copy_up_last(dentry, attr, false); | ||
| 59 | } | ||
| 60 | ovl_drop_write(dentry); | ||
| 61 | out: | ||
| 62 | return err; | ||
| 63 | } | ||
| 64 | |||
| 65 | static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 66 | struct kstat *stat) | ||
| 67 | { | ||
| 68 | struct path realpath; | ||
| 69 | |||
| 70 | ovl_path_real(dentry, &realpath); | ||
| 71 | return vfs_getattr(&realpath, stat); | ||
| 72 | } | ||
| 73 | |||
| 74 | int ovl_permission(struct inode *inode, int mask) | ||
| 75 | { | ||
| 76 | struct ovl_entry *oe; | ||
| 77 | struct dentry *alias = NULL; | ||
| 78 | struct inode *realinode; | ||
| 79 | struct dentry *realdentry; | ||
| 80 | bool is_upper; | ||
| 81 | int err; | ||
| 82 | |||
| 83 | if (S_ISDIR(inode->i_mode)) { | ||
| 84 | oe = inode->i_private; | ||
| 85 | } else if (mask & MAY_NOT_BLOCK) { | ||
| 86 | return -ECHILD; | ||
| 87 | } else { | ||
| 88 | /* | ||
| 89 | * For non-directories find an alias and get the info | ||
| 90 | * from there. | ||
| 91 | */ | ||
| 92 | alias = d_find_any_alias(inode); | ||
| 93 | if (WARN_ON(!alias)) | ||
| 94 | return -ENOENT; | ||
| 95 | |||
| 96 | oe = alias->d_fsdata; | ||
| 97 | } | ||
| 98 | |||
| 99 | realdentry = ovl_entry_real(oe, &is_upper); | ||
| 100 | |||
| 101 | /* Careful in RCU walk mode */ | ||
| 102 | realinode = ACCESS_ONCE(realdentry->d_inode); | ||
| 103 | if (!realinode) { | ||
| 104 | WARN_ON(!(mask & MAY_NOT_BLOCK)); | ||
| 105 | err = -ENOENT; | ||
| 106 | goto out_dput; | ||
| 107 | } | ||
| 108 | |||
| 109 | if (mask & MAY_WRITE) { | ||
| 110 | umode_t mode = realinode->i_mode; | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Writes will always be redirected to upper layer, so | ||
| 114 | * ignore lower layer being read-only. | ||
| 115 | * | ||
| 116 | * If the overlay itself is read-only then proceed | ||
| 117 | * with the permission check, don't return EROFS. | ||
| 118 | * This will only happen if this is the lower layer of | ||
| 119 | * another overlayfs. | ||
| 120 | * | ||
| 121 | * If upper fs becomes read-only after the overlay was | ||
| 122 | * constructed return EROFS to prevent modification of | ||
| 123 | * upper layer. | ||
| 124 | */ | ||
| 125 | err = -EROFS; | ||
| 126 | if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && | ||
| 127 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) | ||
| 128 | goto out_dput; | ||
| 129 | } | ||
| 130 | |||
| 131 | err = __inode_permission(realinode, mask); | ||
| 132 | out_dput: | ||
| 133 | dput(alias); | ||
| 134 | return err; | ||
| 135 | } | ||
| 136 | |||
| 137 | |||
| 138 | struct ovl_link_data { | ||
| 139 | struct dentry *realdentry; | ||
| 140 | void *cookie; | ||
| 141 | }; | ||
| 142 | |||
| 143 | static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
| 144 | { | ||
| 145 | void *ret; | ||
| 146 | struct dentry *realdentry; | ||
| 147 | struct inode *realinode; | ||
| 148 | |||
| 149 | realdentry = ovl_dentry_real(dentry); | ||
| 150 | realinode = realdentry->d_inode; | ||
| 151 | |||
| 152 | if (WARN_ON(!realinode->i_op->follow_link)) | ||
| 153 | return ERR_PTR(-EPERM); | ||
| 154 | |||
| 155 | ret = realinode->i_op->follow_link(realdentry, nd); | ||
| 156 | if (IS_ERR(ret)) | ||
| 157 | return ret; | ||
| 158 | |||
| 159 | if (realinode->i_op->put_link) { | ||
| 160 | struct ovl_link_data *data; | ||
| 161 | |||
| 162 | data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); | ||
| 163 | if (!data) { | ||
| 164 | realinode->i_op->put_link(realdentry, nd, ret); | ||
| 165 | return ERR_PTR(-ENOMEM); | ||
| 166 | } | ||
| 167 | data->realdentry = realdentry; | ||
| 168 | data->cookie = ret; | ||
| 169 | |||
| 170 | return data; | ||
| 171 | } else { | ||
| 172 | return NULL; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) | ||
| 177 | { | ||
| 178 | struct inode *realinode; | ||
| 179 | struct ovl_link_data *data = c; | ||
| 180 | |||
| 181 | if (!data) | ||
| 182 | return; | ||
| 183 | |||
| 184 | realinode = data->realdentry->d_inode; | ||
| 185 | realinode->i_op->put_link(data->realdentry, nd, data->cookie); | ||
| 186 | kfree(data); | ||
| 187 | } | ||
| 188 | |||
| 189 | static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | ||
| 190 | { | ||
| 191 | struct path realpath; | ||
| 192 | struct inode *realinode; | ||
| 193 | |||
| 194 | ovl_path_real(dentry, &realpath); | ||
| 195 | realinode = realpath.dentry->d_inode; | ||
| 196 | |||
| 197 | if (!realinode->i_op->readlink) | ||
| 198 | return -EINVAL; | ||
| 199 | |||
| 200 | touch_atime(&realpath); | ||
| 201 | |||
| 202 | return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); | ||
| 203 | } | ||
| 204 | |||
| 205 | |||
| 206 | static bool ovl_is_private_xattr(const char *name) | ||
| 207 | { | ||
| 208 | return strncmp(name, "trusted.overlay.", 14) == 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
| 212 | const void *value, size_t size, int flags) | ||
| 213 | { | ||
| 214 | int err; | ||
| 215 | struct dentry *upperdentry; | ||
| 216 | |||
| 217 | err = ovl_want_write(dentry); | ||
| 218 | if (err) | ||
| 219 | goto out; | ||
| 220 | |||
| 221 | err = -EPERM; | ||
| 222 | if (ovl_is_private_xattr(name)) | ||
| 223 | goto out_drop_write; | ||
| 224 | |||
| 225 | err = ovl_copy_up(dentry); | ||
| 226 | if (err) | ||
| 227 | goto out_drop_write; | ||
| 228 | |||
| 229 | upperdentry = ovl_dentry_upper(dentry); | ||
| 230 | err = vfs_setxattr(upperdentry, name, value, size, flags); | ||
| 231 | |||
| 232 | out_drop_write: | ||
| 233 | ovl_drop_write(dentry); | ||
| 234 | out: | ||
| 235 | return err; | ||
| 236 | } | ||
| 237 | |||
| 238 | static bool ovl_need_xattr_filter(struct dentry *dentry, | ||
| 239 | enum ovl_path_type type) | ||
| 240 | { | ||
| 241 | return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); | ||
| 242 | } | ||
| 243 | |||
| 244 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
| 245 | void *value, size_t size) | ||
| 246 | { | ||
| 247 | struct path realpath; | ||
| 248 | enum ovl_path_type type = ovl_path_real(dentry, &realpath); | ||
| 249 | |||
| 250 | if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) | ||
| 251 | return -ENODATA; | ||
| 252 | |||
| 253 | return vfs_getxattr(realpath.dentry, name, value, size); | ||
| 254 | } | ||
| 255 | |||
| 256 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) | ||
| 257 | { | ||
| 258 | struct path realpath; | ||
| 259 | enum ovl_path_type type = ovl_path_real(dentry, &realpath); | ||
| 260 | ssize_t res; | ||
| 261 | int off; | ||
| 262 | |||
| 263 | res = vfs_listxattr(realpath.dentry, list, size); | ||
| 264 | if (res <= 0 || size == 0) | ||
| 265 | return res; | ||
| 266 | |||
| 267 | if (!ovl_need_xattr_filter(dentry, type)) | ||
| 268 | return res; | ||
| 269 | |||
| 270 | /* filter out private xattrs */ | ||
| 271 | for (off = 0; off < res;) { | ||
| 272 | char *s = list + off; | ||
| 273 | size_t slen = strlen(s) + 1; | ||
| 274 | |||
| 275 | BUG_ON(off + slen > res); | ||
| 276 | |||
| 277 | if (ovl_is_private_xattr(s)) { | ||
| 278 | res -= slen; | ||
| 279 | memmove(s, s + slen, res - off); | ||
| 280 | } else { | ||
| 281 | off += slen; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | return res; | ||
| 286 | } | ||
| 287 | |||
| 288 | int ovl_removexattr(struct dentry *dentry, const char *name) | ||
| 289 | { | ||
| 290 | int err; | ||
| 291 | struct path realpath; | ||
| 292 | enum ovl_path_type type = ovl_path_real(dentry, &realpath); | ||
| 293 | |||
| 294 | err = ovl_want_write(dentry); | ||
| 295 | if (err) | ||
| 296 | goto out; | ||
| 297 | |||
| 298 | err = -ENODATA; | ||
| 299 | if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) | ||
| 300 | goto out_drop_write; | ||
| 301 | |||
| 302 | if (type == OVL_PATH_LOWER) { | ||
| 303 | err = vfs_getxattr(realpath.dentry, name, NULL, 0); | ||
| 304 | if (err < 0) | ||
| 305 | goto out_drop_write; | ||
| 306 | |||
| 307 | err = ovl_copy_up(dentry); | ||
| 308 | if (err) | ||
| 309 | goto out_drop_write; | ||
| 310 | |||
| 311 | ovl_path_upper(dentry, &realpath); | ||
| 312 | } | ||
| 313 | |||
| 314 | err = vfs_removexattr(realpath.dentry, name); | ||
| 315 | out_drop_write: | ||
| 316 | ovl_drop_write(dentry); | ||
| 317 | out: | ||
| 318 | return err; | ||
| 319 | } | ||
| 320 | |||
| 321 | static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, | ||
| 322 | struct dentry *realdentry) | ||
| 323 | { | ||
| 324 | if (type != OVL_PATH_LOWER) | ||
| 325 | return false; | ||
| 326 | |||
| 327 | if (special_file(realdentry->d_inode->i_mode)) | ||
| 328 | return false; | ||
| 329 | |||
| 330 | if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) | ||
| 331 | return false; | ||
| 332 | |||
| 333 | return true; | ||
| 334 | } | ||
| 335 | |||
| 336 | static int ovl_dentry_open(struct dentry *dentry, struct file *file, | ||
| 337 | const struct cred *cred) | ||
| 338 | { | ||
| 339 | int err; | ||
| 340 | struct path realpath; | ||
| 341 | enum ovl_path_type type; | ||
| 342 | bool want_write = false; | ||
| 343 | |||
| 344 | type = ovl_path_real(dentry, &realpath); | ||
| 345 | if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { | ||
| 346 | want_write = true; | ||
| 347 | err = ovl_want_write(dentry); | ||
| 348 | if (err) | ||
| 349 | goto out; | ||
| 350 | |||
| 351 | if (file->f_flags & O_TRUNC) | ||
| 352 | err = ovl_copy_up_last(dentry, NULL, true); | ||
| 353 | else | ||
| 354 | err = ovl_copy_up(dentry); | ||
| 355 | if (err) | ||
| 356 | goto out_drop_write; | ||
| 357 | |||
| 358 | ovl_path_upper(dentry, &realpath); | ||
| 359 | } | ||
| 360 | |||
| 361 | err = vfs_open(&realpath, file, cred); | ||
| 362 | out_drop_write: | ||
| 363 | if (want_write) | ||
| 364 | ovl_drop_write(dentry); | ||
| 365 | out: | ||
| 366 | return err; | ||
| 367 | } | ||
| 368 | |||
| 369 | static const struct inode_operations ovl_file_inode_operations = { | ||
| 370 | .setattr = ovl_setattr, | ||
| 371 | .permission = ovl_permission, | ||
| 372 | .getattr = ovl_getattr, | ||
| 373 | .setxattr = ovl_setxattr, | ||
| 374 | .getxattr = ovl_getxattr, | ||
| 375 | .listxattr = ovl_listxattr, | ||
| 376 | .removexattr = ovl_removexattr, | ||
| 377 | .dentry_open = ovl_dentry_open, | ||
| 378 | }; | ||
| 379 | |||
| 380 | static const struct inode_operations ovl_symlink_inode_operations = { | ||
| 381 | .setattr = ovl_setattr, | ||
| 382 | .follow_link = ovl_follow_link, | ||
| 383 | .put_link = ovl_put_link, | ||
| 384 | .readlink = ovl_readlink, | ||
| 385 | .getattr = ovl_getattr, | ||
| 386 | .setxattr = ovl_setxattr, | ||
| 387 | .getxattr = ovl_getxattr, | ||
| 388 | .listxattr = ovl_listxattr, | ||
| 389 | .removexattr = ovl_removexattr, | ||
| 390 | }; | ||
| 391 | |||
| 392 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
| 393 | struct ovl_entry *oe) | ||
| 394 | { | ||
| 395 | struct inode *inode; | ||
| 396 | |||
| 397 | inode = new_inode(sb); | ||
| 398 | if (!inode) | ||
| 399 | return NULL; | ||
| 400 | |||
| 401 | mode &= S_IFMT; | ||
| 402 | |||
| 403 | inode->i_ino = get_next_ino(); | ||
| 404 | inode->i_mode = mode; | ||
| 405 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | ||
| 406 | |||
| 407 | switch (mode) { | ||
| 408 | case S_IFDIR: | ||
| 409 | inode->i_private = oe; | ||
| 410 | inode->i_op = &ovl_dir_inode_operations; | ||
| 411 | inode->i_fop = &ovl_dir_operations; | ||
| 412 | break; | ||
| 413 | |||
| 414 | case S_IFLNK: | ||
| 415 | inode->i_op = &ovl_symlink_inode_operations; | ||
| 416 | break; | ||
| 417 | |||
| 418 | case S_IFREG: | ||
| 419 | case S_IFSOCK: | ||
| 420 | case S_IFBLK: | ||
| 421 | case S_IFCHR: | ||
| 422 | case S_IFIFO: | ||
| 423 | inode->i_op = &ovl_file_inode_operations; | ||
| 424 | break; | ||
| 425 | |||
| 426 | default: | ||
| 427 | WARN(1, "illegal file type: %i\n", mode); | ||
| 428 | iput(inode); | ||
| 429 | inode = NULL; | ||
| 430 | } | ||
| 431 | |||
| 432 | return inode; | ||
| 433 | |||
| 434 | } | ||
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h new file mode 100644 index 000000000000..814bed33dd07 --- /dev/null +++ b/fs/overlayfs/overlayfs.h | |||
| @@ -0,0 +1,191 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/kernel.h> | ||
| 11 | |||
| 12 | struct ovl_entry; | ||
| 13 | |||
| 14 | enum ovl_path_type { | ||
| 15 | OVL_PATH_PURE_UPPER, | ||
| 16 | OVL_PATH_UPPER, | ||
| 17 | OVL_PATH_MERGE, | ||
| 18 | OVL_PATH_LOWER, | ||
| 19 | }; | ||
| 20 | |||
| 21 | extern const char *ovl_opaque_xattr; | ||
| 22 | |||
| 23 | static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) | ||
| 24 | { | ||
| 25 | int err = vfs_rmdir(dir, dentry); | ||
| 26 | pr_debug("rmdir(%pd2) = %i\n", dentry, err); | ||
| 27 | return err; | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) | ||
| 31 | { | ||
| 32 | int err = vfs_unlink(dir, dentry, NULL); | ||
| 33 | pr_debug("unlink(%pd2) = %i\n", dentry, err); | ||
| 34 | return err; | ||
| 35 | } | ||
| 36 | |||
| 37 | static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, | ||
| 38 | struct dentry *new_dentry, bool debug) | ||
| 39 | { | ||
| 40 | int err = vfs_link(old_dentry, dir, new_dentry, NULL); | ||
| 41 | if (debug) { | ||
| 42 | pr_debug("link(%pd2, %pd2) = %i\n", | ||
| 43 | old_dentry, new_dentry, err); | ||
| 44 | } | ||
| 45 | return err; | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, | ||
| 49 | umode_t mode, bool debug) | ||
| 50 | { | ||
| 51 | int err = vfs_create(dir, dentry, mode, true); | ||
| 52 | if (debug) | ||
| 53 | pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
| 54 | return err; | ||
| 55 | } | ||
| 56 | |||
| 57 | static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, | ||
| 58 | umode_t mode, bool debug) | ||
| 59 | { | ||
| 60 | int err = vfs_mkdir(dir, dentry, mode); | ||
| 61 | if (debug) | ||
| 62 | pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); | ||
| 63 | return err; | ||
| 64 | } | ||
| 65 | |||
| 66 | static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, | ||
| 67 | umode_t mode, dev_t dev, bool debug) | ||
| 68 | { | ||
| 69 | int err = vfs_mknod(dir, dentry, mode, dev); | ||
| 70 | if (debug) { | ||
| 71 | pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", | ||
| 72 | dentry, mode, dev, err); | ||
| 73 | } | ||
| 74 | return err; | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, | ||
| 78 | const char *oldname, bool debug) | ||
| 79 | { | ||
| 80 | int err = vfs_symlink(dir, dentry, oldname); | ||
| 81 | if (debug) | ||
| 82 | pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); | ||
| 83 | return err; | ||
| 84 | } | ||
| 85 | |||
| 86 | static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, | ||
| 87 | const void *value, size_t size, int flags) | ||
| 88 | { | ||
| 89 | int err = vfs_setxattr(dentry, name, value, size, flags); | ||
| 90 | pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", | ||
| 91 | dentry, name, (int) size, (char *) value, flags, err); | ||
| 92 | return err; | ||
| 93 | } | ||
| 94 | |||
| 95 | static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) | ||
| 96 | { | ||
| 97 | int err = vfs_removexattr(dentry, name); | ||
| 98 | pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); | ||
| 99 | return err; | ||
| 100 | } | ||
| 101 | |||
| 102 | static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, | ||
| 103 | struct inode *newdir, struct dentry *newdentry, | ||
| 104 | unsigned int flags) | ||
| 105 | { | ||
| 106 | int err; | ||
| 107 | |||
| 108 | pr_debug("rename2(%pd2, %pd2, 0x%x)\n", | ||
| 109 | olddentry, newdentry, flags); | ||
| 110 | |||
| 111 | err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); | ||
| 112 | |||
| 113 | if (err) { | ||
| 114 | pr_debug("...rename2(%pd2, %pd2, ...) = %i\n", | ||
| 115 | olddentry, newdentry, err); | ||
| 116 | } | ||
| 117 | return err; | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) | ||
| 121 | { | ||
| 122 | int err = vfs_whiteout(dir, dentry); | ||
| 123 | pr_debug("whiteout(%pd2) = %i\n", dentry, err); | ||
| 124 | return err; | ||
| 125 | } | ||
| 126 | |||
| 127 | enum ovl_path_type ovl_path_type(struct dentry *dentry); | ||
| 128 | u64 ovl_dentry_version_get(struct dentry *dentry); | ||
| 129 | void ovl_dentry_version_inc(struct dentry *dentry); | ||
| 130 | void ovl_path_upper(struct dentry *dentry, struct path *path); | ||
| 131 | void ovl_path_lower(struct dentry *dentry, struct path *path); | ||
| 132 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); | ||
| 133 | struct dentry *ovl_dentry_upper(struct dentry *dentry); | ||
| 134 | struct dentry *ovl_dentry_lower(struct dentry *dentry); | ||
| 135 | struct dentry *ovl_dentry_real(struct dentry *dentry); | ||
| 136 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); | ||
| 137 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); | ||
| 138 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); | ||
| 139 | struct dentry *ovl_workdir(struct dentry *dentry); | ||
| 140 | int ovl_want_write(struct dentry *dentry); | ||
| 141 | void ovl_drop_write(struct dentry *dentry); | ||
| 142 | bool ovl_dentry_is_opaque(struct dentry *dentry); | ||
| 143 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); | ||
| 144 | bool ovl_is_whiteout(struct dentry *dentry); | ||
| 145 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); | ||
| 146 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
| 147 | unsigned int flags); | ||
| 148 | struct file *ovl_path_open(struct path *path, int flags); | ||
| 149 | |||
| 150 | struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, | ||
| 151 | struct kstat *stat, const char *link); | ||
| 152 | |||
| 153 | /* readdir.c */ | ||
| 154 | extern const struct file_operations ovl_dir_operations; | ||
| 155 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); | ||
| 156 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); | ||
| 157 | void ovl_cache_free(struct list_head *list); | ||
| 158 | |||
| 159 | /* inode.c */ | ||
| 160 | int ovl_setattr(struct dentry *dentry, struct iattr *attr); | ||
| 161 | int ovl_permission(struct inode *inode, int mask); | ||
| 162 | int ovl_setxattr(struct dentry *dentry, const char *name, | ||
| 163 | const void *value, size_t size, int flags); | ||
| 164 | ssize_t ovl_getxattr(struct dentry *dentry, const char *name, | ||
| 165 | void *value, size_t size); | ||
| 166 | ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); | ||
| 167 | int ovl_removexattr(struct dentry *dentry, const char *name); | ||
| 168 | |||
| 169 | struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, | ||
| 170 | struct ovl_entry *oe); | ||
| 171 | static inline void ovl_copyattr(struct inode *from, struct inode *to) | ||
| 172 | { | ||
| 173 | to->i_uid = from->i_uid; | ||
| 174 | to->i_gid = from->i_gid; | ||
| 175 | } | ||
| 176 | |||
| 177 | /* dir.c */ | ||
| 178 | extern const struct inode_operations ovl_dir_inode_operations; | ||
| 179 | struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); | ||
| 180 | int ovl_create_real(struct inode *dir, struct dentry *newdentry, | ||
| 181 | struct kstat *stat, const char *link, | ||
| 182 | struct dentry *hardlink, bool debug); | ||
| 183 | void ovl_cleanup(struct inode *dir, struct dentry *dentry); | ||
| 184 | |||
| 185 | /* copy_up.c */ | ||
| 186 | int ovl_copy_up(struct dentry *dentry); | ||
| 187 | int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, | ||
| 188 | struct path *lowerpath, struct kstat *stat, | ||
| 189 | struct iattr *attr); | ||
| 190 | int ovl_copy_xattr(struct dentry *old, struct dentry *new); | ||
| 191 | int ovl_set_attr(struct dentry *upper, struct kstat *stat); | ||
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c new file mode 100644 index 000000000000..ab1e3dcbed95 --- /dev/null +++ b/fs/overlayfs/readdir.c | |||
| @@ -0,0 +1,586 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/namei.h> | ||
| 13 | #include <linux/file.h> | ||
| 14 | #include <linux/xattr.h> | ||
| 15 | #include <linux/rbtree.h> | ||
| 16 | #include <linux/security.h> | ||
| 17 | #include <linux/cred.h> | ||
| 18 | #include "overlayfs.h" | ||
| 19 | |||
| 20 | struct ovl_cache_entry { | ||
| 21 | unsigned int len; | ||
| 22 | unsigned int type; | ||
| 23 | u64 ino; | ||
| 24 | struct list_head l_node; | ||
| 25 | struct rb_node node; | ||
| 26 | bool is_whiteout; | ||
| 27 | bool is_cursor; | ||
| 28 | char name[]; | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct ovl_dir_cache { | ||
| 32 | long refcount; | ||
| 33 | u64 version; | ||
| 34 | struct list_head entries; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct ovl_readdir_data { | ||
| 38 | struct dir_context ctx; | ||
| 39 | bool is_merge; | ||
| 40 | struct rb_root root; | ||
| 41 | struct list_head *list; | ||
| 42 | struct list_head middle; | ||
| 43 | int count; | ||
| 44 | int err; | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct ovl_dir_file { | ||
| 48 | bool is_real; | ||
| 49 | bool is_upper; | ||
| 50 | struct ovl_dir_cache *cache; | ||
| 51 | struct ovl_cache_entry cursor; | ||
| 52 | struct file *realfile; | ||
| 53 | struct file *upperfile; | ||
| 54 | }; | ||
| 55 | |||
| 56 | static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) | ||
| 57 | { | ||
| 58 | return container_of(n, struct ovl_cache_entry, node); | ||
| 59 | } | ||
| 60 | |||
| 61 | static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, | ||
| 62 | const char *name, int len) | ||
| 63 | { | ||
| 64 | struct rb_node *node = root->rb_node; | ||
| 65 | int cmp; | ||
| 66 | |||
| 67 | while (node) { | ||
| 68 | struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); | ||
| 69 | |||
| 70 | cmp = strncmp(name, p->name, len); | ||
| 71 | if (cmp > 0) | ||
| 72 | node = p->node.rb_right; | ||
| 73 | else if (cmp < 0 || len < p->len) | ||
| 74 | node = p->node.rb_left; | ||
| 75 | else | ||
| 76 | return p; | ||
| 77 | } | ||
| 78 | |||
| 79 | return NULL; | ||
| 80 | } | ||
| 81 | |||
| 82 | static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, | ||
| 83 | u64 ino, unsigned int d_type) | ||
| 84 | { | ||
| 85 | struct ovl_cache_entry *p; | ||
| 86 | size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); | ||
| 87 | |||
| 88 | p = kmalloc(size, GFP_KERNEL); | ||
| 89 | if (p) { | ||
| 90 | memcpy(p->name, name, len); | ||
| 91 | p->name[len] = '\0'; | ||
| 92 | p->len = len; | ||
| 93 | p->type = d_type; | ||
| 94 | p->ino = ino; | ||
| 95 | p->is_whiteout = false; | ||
| 96 | p->is_cursor = false; | ||
| 97 | } | ||
| 98 | |||
| 99 | return p; | ||
| 100 | } | ||
| 101 | |||
| 102 | static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, | ||
| 103 | const char *name, int len, u64 ino, | ||
| 104 | unsigned int d_type) | ||
| 105 | { | ||
| 106 | struct rb_node **newp = &rdd->root.rb_node; | ||
| 107 | struct rb_node *parent = NULL; | ||
| 108 | struct ovl_cache_entry *p; | ||
| 109 | |||
| 110 | while (*newp) { | ||
| 111 | int cmp; | ||
| 112 | struct ovl_cache_entry *tmp; | ||
| 113 | |||
| 114 | parent = *newp; | ||
| 115 | tmp = ovl_cache_entry_from_node(*newp); | ||
| 116 | cmp = strncmp(name, tmp->name, len); | ||
| 117 | if (cmp > 0) | ||
| 118 | newp = &tmp->node.rb_right; | ||
| 119 | else if (cmp < 0 || len < tmp->len) | ||
| 120 | newp = &tmp->node.rb_left; | ||
| 121 | else | ||
| 122 | return 0; | ||
| 123 | } | ||
| 124 | |||
| 125 | p = ovl_cache_entry_new(name, len, ino, d_type); | ||
| 126 | if (p == NULL) | ||
| 127 | return -ENOMEM; | ||
| 128 | |||
| 129 | list_add_tail(&p->l_node, rdd->list); | ||
| 130 | rb_link_node(&p->node, parent, newp); | ||
| 131 | rb_insert_color(&p->node, &rdd->root); | ||
| 132 | |||
| 133 | return 0; | ||
| 134 | } | ||
| 135 | |||
| 136 | static int ovl_fill_lower(struct ovl_readdir_data *rdd, | ||
| 137 | const char *name, int namelen, | ||
| 138 | loff_t offset, u64 ino, unsigned int d_type) | ||
| 139 | { | ||
| 140 | struct ovl_cache_entry *p; | ||
| 141 | |||
| 142 | p = ovl_cache_entry_find(&rdd->root, name, namelen); | ||
| 143 | if (p) { | ||
| 144 | list_move_tail(&p->l_node, &rdd->middle); | ||
| 145 | } else { | ||
| 146 | p = ovl_cache_entry_new(name, namelen, ino, d_type); | ||
| 147 | if (p == NULL) | ||
| 148 | rdd->err = -ENOMEM; | ||
| 149 | else | ||
| 150 | list_add_tail(&p->l_node, &rdd->middle); | ||
| 151 | } | ||
| 152 | |||
| 153 | return rdd->err; | ||
| 154 | } | ||
| 155 | |||
| 156 | void ovl_cache_free(struct list_head *list) | ||
| 157 | { | ||
| 158 | struct ovl_cache_entry *p; | ||
| 159 | struct ovl_cache_entry *n; | ||
| 160 | |||
| 161 | list_for_each_entry_safe(p, n, list, l_node) | ||
| 162 | kfree(p); | ||
| 163 | |||
| 164 | INIT_LIST_HEAD(list); | ||
| 165 | } | ||
| 166 | |||
| 167 | static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) | ||
| 168 | { | ||
| 169 | struct ovl_dir_cache *cache = od->cache; | ||
| 170 | |||
| 171 | list_del_init(&od->cursor.l_node); | ||
| 172 | WARN_ON(cache->refcount <= 0); | ||
| 173 | cache->refcount--; | ||
| 174 | if (!cache->refcount) { | ||
| 175 | if (ovl_dir_cache(dentry) == cache) | ||
| 176 | ovl_set_dir_cache(dentry, NULL); | ||
| 177 | |||
| 178 | ovl_cache_free(&cache->entries); | ||
| 179 | kfree(cache); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | static int ovl_fill_merge(void *buf, const char *name, int namelen, | ||
| 184 | loff_t offset, u64 ino, unsigned int d_type) | ||
| 185 | { | ||
| 186 | struct ovl_readdir_data *rdd = buf; | ||
| 187 | |||
| 188 | rdd->count++; | ||
| 189 | if (!rdd->is_merge) | ||
| 190 | return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); | ||
| 191 | else | ||
| 192 | return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); | ||
| 193 | } | ||
| 194 | |||
| 195 | static inline int ovl_dir_read(struct path *realpath, | ||
| 196 | struct ovl_readdir_data *rdd) | ||
| 197 | { | ||
| 198 | struct file *realfile; | ||
| 199 | int err; | ||
| 200 | |||
| 201 | realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); | ||
| 202 | if (IS_ERR(realfile)) | ||
| 203 | return PTR_ERR(realfile); | ||
| 204 | |||
| 205 | rdd->ctx.pos = 0; | ||
| 206 | do { | ||
| 207 | rdd->count = 0; | ||
| 208 | rdd->err = 0; | ||
| 209 | err = iterate_dir(realfile, &rdd->ctx); | ||
| 210 | if (err >= 0) | ||
| 211 | err = rdd->err; | ||
| 212 | } while (!err && rdd->count); | ||
| 213 | fput(realfile); | ||
| 214 | |||
| 215 | return err; | ||
| 216 | } | ||
| 217 | |||
| 218 | static void ovl_dir_reset(struct file *file) | ||
| 219 | { | ||
| 220 | struct ovl_dir_file *od = file->private_data; | ||
| 221 | struct ovl_dir_cache *cache = od->cache; | ||
| 222 | struct dentry *dentry = file->f_path.dentry; | ||
| 223 | enum ovl_path_type type = ovl_path_type(dentry); | ||
| 224 | |||
| 225 | if (cache && ovl_dentry_version_get(dentry) != cache->version) { | ||
| 226 | ovl_cache_put(od, dentry); | ||
| 227 | od->cache = NULL; | ||
| 228 | } | ||
| 229 | WARN_ON(!od->is_real && type != OVL_PATH_MERGE); | ||
| 230 | if (od->is_real && type == OVL_PATH_MERGE) | ||
| 231 | od->is_real = false; | ||
| 232 | } | ||
| 233 | |||
| 234 | static int ovl_dir_mark_whiteouts(struct dentry *dir, | ||
| 235 | struct ovl_readdir_data *rdd) | ||
| 236 | { | ||
| 237 | struct ovl_cache_entry *p; | ||
| 238 | struct dentry *dentry; | ||
| 239 | const struct cred *old_cred; | ||
| 240 | struct cred *override_cred; | ||
| 241 | |||
| 242 | override_cred = prepare_creds(); | ||
| 243 | if (!override_cred) { | ||
| 244 | ovl_cache_free(rdd->list); | ||
| 245 | return -ENOMEM; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* | ||
| 249 | * CAP_DAC_OVERRIDE for lookup | ||
| 250 | */ | ||
| 251 | cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); | ||
| 252 | old_cred = override_creds(override_cred); | ||
| 253 | |||
| 254 | mutex_lock(&dir->d_inode->i_mutex); | ||
| 255 | list_for_each_entry(p, rdd->list, l_node) { | ||
| 256 | if (p->is_cursor) | ||
| 257 | continue; | ||
| 258 | |||
| 259 | if (p->type != DT_CHR) | ||
| 260 | continue; | ||
| 261 | |||
| 262 | dentry = lookup_one_len(p->name, dir, p->len); | ||
| 263 | if (IS_ERR(dentry)) | ||
| 264 | continue; | ||
| 265 | |||
| 266 | p->is_whiteout = ovl_is_whiteout(dentry); | ||
| 267 | dput(dentry); | ||
| 268 | } | ||
| 269 | mutex_unlock(&dir->d_inode->i_mutex); | ||
| 270 | |||
| 271 | revert_creds(old_cred); | ||
| 272 | put_cred(override_cred); | ||
| 273 | |||
| 274 | return 0; | ||
| 275 | } | ||
| 276 | |||
| 277 | static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) | ||
| 278 | { | ||
| 279 | int err; | ||
| 280 | struct path lowerpath; | ||
| 281 | struct path upperpath; | ||
| 282 | struct ovl_readdir_data rdd = { | ||
| 283 | .ctx.actor = ovl_fill_merge, | ||
| 284 | .list = list, | ||
| 285 | .root = RB_ROOT, | ||
| 286 | .is_merge = false, | ||
| 287 | }; | ||
| 288 | |||
| 289 | ovl_path_lower(dentry, &lowerpath); | ||
| 290 | ovl_path_upper(dentry, &upperpath); | ||
| 291 | |||
| 292 | if (upperpath.dentry) { | ||
| 293 | err = ovl_dir_read(&upperpath, &rdd); | ||
| 294 | if (err) | ||
| 295 | goto out; | ||
| 296 | |||
| 297 | if (lowerpath.dentry) { | ||
| 298 | err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); | ||
| 299 | if (err) | ||
| 300 | goto out; | ||
| 301 | } | ||
| 302 | } | ||
| 303 | if (lowerpath.dentry) { | ||
| 304 | /* | ||
| 305 | * Insert lowerpath entries before upperpath ones, this allows | ||
| 306 | * offsets to be reasonably constant | ||
| 307 | */ | ||
| 308 | list_add(&rdd.middle, rdd.list); | ||
| 309 | rdd.is_merge = true; | ||
| 310 | err = ovl_dir_read(&lowerpath, &rdd); | ||
| 311 | list_del(&rdd.middle); | ||
| 312 | } | ||
| 313 | out: | ||
| 314 | return err; | ||
| 315 | } | ||
| 316 | |||
| 317 | static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) | ||
| 318 | { | ||
| 319 | struct ovl_cache_entry *p; | ||
| 320 | loff_t off = 0; | ||
| 321 | |||
| 322 | list_for_each_entry(p, &od->cache->entries, l_node) { | ||
| 323 | if (p->is_cursor) | ||
| 324 | continue; | ||
| 325 | if (off >= pos) | ||
| 326 | break; | ||
| 327 | off++; | ||
| 328 | } | ||
| 329 | list_move_tail(&od->cursor.l_node, &p->l_node); | ||
| 330 | } | ||
| 331 | |||
| 332 | static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) | ||
| 333 | { | ||
| 334 | int res; | ||
| 335 | struct ovl_dir_cache *cache; | ||
| 336 | |||
| 337 | cache = ovl_dir_cache(dentry); | ||
| 338 | if (cache && ovl_dentry_version_get(dentry) == cache->version) { | ||
| 339 | cache->refcount++; | ||
| 340 | return cache; | ||
| 341 | } | ||
| 342 | ovl_set_dir_cache(dentry, NULL); | ||
| 343 | |||
| 344 | cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); | ||
| 345 | if (!cache) | ||
| 346 | return ERR_PTR(-ENOMEM); | ||
| 347 | |||
| 348 | cache->refcount = 1; | ||
| 349 | INIT_LIST_HEAD(&cache->entries); | ||
| 350 | |||
| 351 | res = ovl_dir_read_merged(dentry, &cache->entries); | ||
| 352 | if (res) { | ||
| 353 | ovl_cache_free(&cache->entries); | ||
| 354 | kfree(cache); | ||
| 355 | return ERR_PTR(res); | ||
| 356 | } | ||
| 357 | |||
| 358 | cache->version = ovl_dentry_version_get(dentry); | ||
| 359 | ovl_set_dir_cache(dentry, cache); | ||
| 360 | |||
| 361 | return cache; | ||
| 362 | } | ||
| 363 | |||
| 364 | static int ovl_iterate(struct file *file, struct dir_context *ctx) | ||
| 365 | { | ||
| 366 | struct ovl_dir_file *od = file->private_data; | ||
| 367 | struct dentry *dentry = file->f_path.dentry; | ||
| 368 | |||
| 369 | if (!ctx->pos) | ||
| 370 | ovl_dir_reset(file); | ||
| 371 | |||
| 372 | if (od->is_real) | ||
| 373 | return iterate_dir(od->realfile, ctx); | ||
| 374 | |||
| 375 | if (!od->cache) { | ||
| 376 | struct ovl_dir_cache *cache; | ||
| 377 | |||
| 378 | cache = ovl_cache_get(dentry); | ||
| 379 | if (IS_ERR(cache)) | ||
| 380 | return PTR_ERR(cache); | ||
| 381 | |||
| 382 | od->cache = cache; | ||
| 383 | ovl_seek_cursor(od, ctx->pos); | ||
| 384 | } | ||
| 385 | |||
| 386 | while (od->cursor.l_node.next != &od->cache->entries) { | ||
| 387 | struct ovl_cache_entry *p; | ||
| 388 | |||
| 389 | p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); | ||
| 390 | /* Skip cursors */ | ||
| 391 | if (!p->is_cursor) { | ||
| 392 | if (!p->is_whiteout) { | ||
| 393 | if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) | ||
| 394 | break; | ||
| 395 | } | ||
| 396 | ctx->pos++; | ||
| 397 | } | ||
| 398 | list_move(&od->cursor.l_node, &p->l_node); | ||
| 399 | } | ||
| 400 | return 0; | ||
| 401 | } | ||
| 402 | |||
| 403 | static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) | ||
| 404 | { | ||
| 405 | loff_t res; | ||
| 406 | struct ovl_dir_file *od = file->private_data; | ||
| 407 | |||
| 408 | mutex_lock(&file_inode(file)->i_mutex); | ||
| 409 | if (!file->f_pos) | ||
| 410 | ovl_dir_reset(file); | ||
| 411 | |||
| 412 | if (od->is_real) { | ||
| 413 | res = vfs_llseek(od->realfile, offset, origin); | ||
| 414 | file->f_pos = od->realfile->f_pos; | ||
| 415 | } else { | ||
| 416 | res = -EINVAL; | ||
| 417 | |||
| 418 | switch (origin) { | ||
| 419 | case SEEK_CUR: | ||
| 420 | offset += file->f_pos; | ||
| 421 | break; | ||
| 422 | case SEEK_SET: | ||
| 423 | break; | ||
| 424 | default: | ||
| 425 | goto out_unlock; | ||
| 426 | } | ||
| 427 | if (offset < 0) | ||
| 428 | goto out_unlock; | ||
| 429 | |||
| 430 | if (offset != file->f_pos) { | ||
| 431 | file->f_pos = offset; | ||
| 432 | if (od->cache) | ||
| 433 | ovl_seek_cursor(od, offset); | ||
| 434 | } | ||
| 435 | res = offset; | ||
| 436 | } | ||
| 437 | out_unlock: | ||
| 438 | mutex_unlock(&file_inode(file)->i_mutex); | ||
| 439 | |||
| 440 | return res; | ||
| 441 | } | ||
| 442 | |||
| 443 | static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | ||
| 444 | int datasync) | ||
| 445 | { | ||
| 446 | struct ovl_dir_file *od = file->private_data; | ||
| 447 | struct dentry *dentry = file->f_path.dentry; | ||
| 448 | struct file *realfile = od->realfile; | ||
| 449 | |||
| 450 | /* | ||
| 451 | * Need to check if we started out being a lower dir, but got copied up | ||
| 452 | */ | ||
| 453 | if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { | ||
| 454 | struct inode *inode = file_inode(file); | ||
| 455 | |||
| 456 | realfile = lockless_dereference(od->upperfile); | ||
| 457 | if (!realfile) { | ||
| 458 | struct path upperpath; | ||
| 459 | |||
| 460 | ovl_path_upper(dentry, &upperpath); | ||
| 461 | realfile = ovl_path_open(&upperpath, O_RDONLY); | ||
| 462 | smp_mb__before_spinlock(); | ||
| 463 | mutex_lock(&inode->i_mutex); | ||
| 464 | if (!od->upperfile) { | ||
| 465 | if (IS_ERR(realfile)) { | ||
| 466 | mutex_unlock(&inode->i_mutex); | ||
| 467 | return PTR_ERR(realfile); | ||
| 468 | } | ||
| 469 | od->upperfile = realfile; | ||
| 470 | } else { | ||
| 471 | /* somebody has beaten us to it */ | ||
| 472 | if (!IS_ERR(realfile)) | ||
| 473 | fput(realfile); | ||
| 474 | realfile = od->upperfile; | ||
| 475 | } | ||
| 476 | mutex_unlock(&inode->i_mutex); | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | return vfs_fsync_range(realfile, start, end, datasync); | ||
| 481 | } | ||
| 482 | |||
| 483 | static int ovl_dir_release(struct inode *inode, struct file *file) | ||
| 484 | { | ||
| 485 | struct ovl_dir_file *od = file->private_data; | ||
| 486 | |||
| 487 | if (od->cache) { | ||
| 488 | mutex_lock(&inode->i_mutex); | ||
| 489 | ovl_cache_put(od, file->f_path.dentry); | ||
| 490 | mutex_unlock(&inode->i_mutex); | ||
| 491 | } | ||
| 492 | fput(od->realfile); | ||
| 493 | if (od->upperfile) | ||
| 494 | fput(od->upperfile); | ||
| 495 | kfree(od); | ||
| 496 | |||
| 497 | return 0; | ||
| 498 | } | ||
| 499 | |||
| 500 | static int ovl_dir_open(struct inode *inode, struct file *file) | ||
| 501 | { | ||
| 502 | struct path realpath; | ||
| 503 | struct file *realfile; | ||
| 504 | struct ovl_dir_file *od; | ||
| 505 | enum ovl_path_type type; | ||
| 506 | |||
| 507 | od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); | ||
| 508 | if (!od) | ||
| 509 | return -ENOMEM; | ||
| 510 | |||
| 511 | type = ovl_path_real(file->f_path.dentry, &realpath); | ||
| 512 | realfile = ovl_path_open(&realpath, file->f_flags); | ||
| 513 | if (IS_ERR(realfile)) { | ||
| 514 | kfree(od); | ||
| 515 | return PTR_ERR(realfile); | ||
| 516 | } | ||
| 517 | INIT_LIST_HEAD(&od->cursor.l_node); | ||
| 518 | od->realfile = realfile; | ||
| 519 | od->is_real = (type != OVL_PATH_MERGE); | ||
| 520 | od->is_upper = (type != OVL_PATH_LOWER); | ||
| 521 | od->cursor.is_cursor = true; | ||
| 522 | file->private_data = od; | ||
| 523 | |||
| 524 | return 0; | ||
| 525 | } | ||
| 526 | |||
| 527 | const struct file_operations ovl_dir_operations = { | ||
| 528 | .read = generic_read_dir, | ||
| 529 | .open = ovl_dir_open, | ||
| 530 | .iterate = ovl_iterate, | ||
| 531 | .llseek = ovl_dir_llseek, | ||
| 532 | .fsync = ovl_dir_fsync, | ||
| 533 | .release = ovl_dir_release, | ||
| 534 | }; | ||
| 535 | |||
| 536 | int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) | ||
| 537 | { | ||
| 538 | int err; | ||
| 539 | struct ovl_cache_entry *p; | ||
| 540 | |||
| 541 | err = ovl_dir_read_merged(dentry, list); | ||
| 542 | if (err) | ||
| 543 | return err; | ||
| 544 | |||
| 545 | err = 0; | ||
| 546 | |||
| 547 | list_for_each_entry(p, list, l_node) { | ||
| 548 | if (p->is_whiteout) | ||
| 549 | continue; | ||
| 550 | |||
| 551 | if (p->name[0] == '.') { | ||
| 552 | if (p->len == 1) | ||
| 553 | continue; | ||
| 554 | if (p->len == 2 && p->name[1] == '.') | ||
| 555 | continue; | ||
| 556 | } | ||
| 557 | err = -ENOTEMPTY; | ||
| 558 | break; | ||
| 559 | } | ||
| 560 | |||
| 561 | return err; | ||
| 562 | } | ||
| 563 | |||
| 564 | void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) | ||
| 565 | { | ||
| 566 | struct ovl_cache_entry *p; | ||
| 567 | |||
| 568 | mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); | ||
| 569 | list_for_each_entry(p, list, l_node) { | ||
| 570 | struct dentry *dentry; | ||
| 571 | |||
| 572 | if (!p->is_whiteout) | ||
| 573 | continue; | ||
| 574 | |||
| 575 | dentry = lookup_one_len(p->name, upper, p->len); | ||
| 576 | if (IS_ERR(dentry)) { | ||
| 577 | pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", | ||
| 578 | upper->d_name.name, p->len, p->name, | ||
| 579 | (int) PTR_ERR(dentry)); | ||
| 580 | continue; | ||
| 581 | } | ||
| 582 | ovl_cleanup(upper->d_inode, dentry); | ||
| 583 | dput(dentry); | ||
| 584 | } | ||
| 585 | mutex_unlock(&upper->d_inode->i_mutex); | ||
| 586 | } | ||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c new file mode 100644 index 000000000000..f16d318b71f8 --- /dev/null +++ b/fs/overlayfs/super.c | |||
| @@ -0,0 +1,833 @@ | |||
| 1 | /* | ||
| 2 | * | ||
| 3 | * Copyright (C) 2011 Novell Inc. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms of the GNU General Public License version 2 as published by | ||
| 7 | * the Free Software Foundation. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/fs.h> | ||
| 11 | #include <linux/namei.h> | ||
| 12 | #include <linux/xattr.h> | ||
| 13 | #include <linux/security.h> | ||
| 14 | #include <linux/mount.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | #include <linux/parser.h> | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/statfs.h> | ||
| 20 | #include <linux/seq_file.h> | ||
| 21 | #include "overlayfs.h" | ||
| 22 | |||
| 23 | MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); | ||
| 24 | MODULE_DESCRIPTION("Overlay filesystem"); | ||
| 25 | MODULE_LICENSE("GPL"); | ||
| 26 | |||
| 27 | #define OVERLAYFS_SUPER_MAGIC 0x794c7630 | ||
| 28 | |||
| 29 | struct ovl_config { | ||
| 30 | char *lowerdir; | ||
| 31 | char *upperdir; | ||
| 32 | char *workdir; | ||
| 33 | }; | ||
| 34 | |||
| 35 | /* private information held for overlayfs's superblock */ | ||
| 36 | struct ovl_fs { | ||
| 37 | struct vfsmount *upper_mnt; | ||
| 38 | struct vfsmount *lower_mnt; | ||
| 39 | struct dentry *workdir; | ||
| 40 | long lower_namelen; | ||
| 41 | /* pathnames of lower and upper dirs, for show_options */ | ||
| 42 | struct ovl_config config; | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct ovl_dir_cache; | ||
| 46 | |||
| 47 | /* private information held for every overlayfs dentry */ | ||
| 48 | struct ovl_entry { | ||
| 49 | struct dentry *__upperdentry; | ||
| 50 | struct dentry *lowerdentry; | ||
| 51 | struct ovl_dir_cache *cache; | ||
| 52 | union { | ||
| 53 | struct { | ||
| 54 | u64 version; | ||
| 55 | bool opaque; | ||
| 56 | }; | ||
| 57 | struct rcu_head rcu; | ||
| 58 | }; | ||
| 59 | }; | ||
| 60 | |||
| 61 | const char *ovl_opaque_xattr = "trusted.overlay.opaque"; | ||
| 62 | |||
| 63 | |||
| 64 | enum ovl_path_type ovl_path_type(struct dentry *dentry) | ||
| 65 | { | ||
| 66 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 67 | |||
| 68 | if (oe->__upperdentry) { | ||
| 69 | if (oe->lowerdentry) { | ||
| 70 | if (S_ISDIR(dentry->d_inode->i_mode)) | ||
| 71 | return OVL_PATH_MERGE; | ||
| 72 | else | ||
| 73 | return OVL_PATH_UPPER; | ||
| 74 | } else { | ||
| 75 | if (oe->opaque) | ||
| 76 | return OVL_PATH_UPPER; | ||
| 77 | else | ||
| 78 | return OVL_PATH_PURE_UPPER; | ||
| 79 | } | ||
| 80 | } else { | ||
| 81 | return OVL_PATH_LOWER; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) | ||
| 86 | { | ||
| 87 | return lockless_dereference(oe->__upperdentry); | ||
| 88 | } | ||
| 89 | |||
| 90 | void ovl_path_upper(struct dentry *dentry, struct path *path) | ||
| 91 | { | ||
| 92 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 93 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 94 | |||
| 95 | path->mnt = ofs->upper_mnt; | ||
| 96 | path->dentry = ovl_upperdentry_dereference(oe); | ||
| 97 | } | ||
| 98 | |||
| 99 | enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) | ||
| 100 | { | ||
| 101 | |||
| 102 | enum ovl_path_type type = ovl_path_type(dentry); | ||
| 103 | |||
| 104 | if (type == OVL_PATH_LOWER) | ||
| 105 | ovl_path_lower(dentry, path); | ||
| 106 | else | ||
| 107 | ovl_path_upper(dentry, path); | ||
| 108 | |||
| 109 | return type; | ||
| 110 | } | ||
| 111 | |||
| 112 | struct dentry *ovl_dentry_upper(struct dentry *dentry) | ||
| 113 | { | ||
| 114 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 115 | |||
| 116 | return ovl_upperdentry_dereference(oe); | ||
| 117 | } | ||
| 118 | |||
| 119 | struct dentry *ovl_dentry_lower(struct dentry *dentry) | ||
| 120 | { | ||
| 121 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 122 | |||
| 123 | return oe->lowerdentry; | ||
| 124 | } | ||
| 125 | |||
| 126 | struct dentry *ovl_dentry_real(struct dentry *dentry) | ||
| 127 | { | ||
| 128 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 129 | struct dentry *realdentry; | ||
| 130 | |||
| 131 | realdentry = ovl_upperdentry_dereference(oe); | ||
| 132 | if (!realdentry) | ||
| 133 | realdentry = oe->lowerdentry; | ||
| 134 | |||
| 135 | return realdentry; | ||
| 136 | } | ||
| 137 | |||
| 138 | struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) | ||
| 139 | { | ||
| 140 | struct dentry *realdentry; | ||
| 141 | |||
| 142 | realdentry = ovl_upperdentry_dereference(oe); | ||
| 143 | if (realdentry) { | ||
| 144 | *is_upper = true; | ||
| 145 | } else { | ||
| 146 | realdentry = oe->lowerdentry; | ||
| 147 | *is_upper = false; | ||
| 148 | } | ||
| 149 | return realdentry; | ||
| 150 | } | ||
| 151 | |||
| 152 | struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) | ||
| 153 | { | ||
| 154 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 155 | |||
| 156 | return oe->cache; | ||
| 157 | } | ||
| 158 | |||
| 159 | void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) | ||
| 160 | { | ||
| 161 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 162 | |||
| 163 | oe->cache = cache; | ||
| 164 | } | ||
| 165 | |||
| 166 | void ovl_path_lower(struct dentry *dentry, struct path *path) | ||
| 167 | { | ||
| 168 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 169 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 170 | |||
| 171 | path->mnt = ofs->lower_mnt; | ||
| 172 | path->dentry = oe->lowerdentry; | ||
| 173 | } | ||
| 174 | |||
| 175 | int ovl_want_write(struct dentry *dentry) | ||
| 176 | { | ||
| 177 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 178 | return mnt_want_write(ofs->upper_mnt); | ||
| 179 | } | ||
| 180 | |||
| 181 | void ovl_drop_write(struct dentry *dentry) | ||
| 182 | { | ||
| 183 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 184 | mnt_drop_write(ofs->upper_mnt); | ||
| 185 | } | ||
| 186 | |||
| 187 | struct dentry *ovl_workdir(struct dentry *dentry) | ||
| 188 | { | ||
| 189 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 190 | return ofs->workdir; | ||
| 191 | } | ||
| 192 | |||
| 193 | bool ovl_dentry_is_opaque(struct dentry *dentry) | ||
| 194 | { | ||
| 195 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 196 | return oe->opaque; | ||
| 197 | } | ||
| 198 | |||
| 199 | void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) | ||
| 200 | { | ||
| 201 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 202 | oe->opaque = opaque; | ||
| 203 | } | ||
| 204 | |||
| 205 | void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) | ||
| 206 | { | ||
| 207 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 208 | |||
| 209 | WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); | ||
| 210 | WARN_ON(oe->__upperdentry); | ||
| 211 | BUG_ON(!upperdentry->d_inode); | ||
| 212 | /* | ||
| 213 | * Make sure upperdentry is consistent before making it visible to | ||
| 214 | * ovl_upperdentry_dereference(). | ||
| 215 | */ | ||
| 216 | smp_wmb(); | ||
| 217 | oe->__upperdentry = upperdentry; | ||
| 218 | } | ||
| 219 | |||
| 220 | void ovl_dentry_version_inc(struct dentry *dentry) | ||
| 221 | { | ||
| 222 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 223 | |||
| 224 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
| 225 | oe->version++; | ||
| 226 | } | ||
| 227 | |||
| 228 | u64 ovl_dentry_version_get(struct dentry *dentry) | ||
| 229 | { | ||
| 230 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 231 | |||
| 232 | WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | ||
| 233 | return oe->version; | ||
| 234 | } | ||
| 235 | |||
| 236 | bool ovl_is_whiteout(struct dentry *dentry) | ||
| 237 | { | ||
| 238 | struct inode *inode = dentry->d_inode; | ||
| 239 | |||
| 240 | return inode && IS_WHITEOUT(inode); | ||
| 241 | } | ||
| 242 | |||
| 243 | static bool ovl_is_opaquedir(struct dentry *dentry) | ||
| 244 | { | ||
| 245 | int res; | ||
| 246 | char val; | ||
| 247 | struct inode *inode = dentry->d_inode; | ||
| 248 | |||
| 249 | if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) | ||
| 250 | return false; | ||
| 251 | |||
| 252 | res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); | ||
| 253 | if (res == 1 && val == 'y') | ||
| 254 | return true; | ||
| 255 | |||
| 256 | return false; | ||
| 257 | } | ||
| 258 | |||
| 259 | static void ovl_dentry_release(struct dentry *dentry) | ||
| 260 | { | ||
| 261 | struct ovl_entry *oe = dentry->d_fsdata; | ||
| 262 | |||
| 263 | if (oe) { | ||
| 264 | dput(oe->__upperdentry); | ||
| 265 | dput(oe->lowerdentry); | ||
| 266 | kfree_rcu(oe, rcu); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | static const struct dentry_operations ovl_dentry_operations = { | ||
| 271 | .d_release = ovl_dentry_release, | ||
| 272 | }; | ||
| 273 | |||
| 274 | static struct ovl_entry *ovl_alloc_entry(void) | ||
| 275 | { | ||
| 276 | return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); | ||
| 277 | } | ||
| 278 | |||
| 279 | static inline struct dentry *ovl_lookup_real(struct dentry *dir, | ||
| 280 | struct qstr *name) | ||
| 281 | { | ||
| 282 | struct dentry *dentry; | ||
| 283 | |||
| 284 | mutex_lock(&dir->d_inode->i_mutex); | ||
| 285 | dentry = lookup_one_len(name->name, dir, name->len); | ||
| 286 | mutex_unlock(&dir->d_inode->i_mutex); | ||
| 287 | |||
| 288 | if (IS_ERR(dentry)) { | ||
| 289 | if (PTR_ERR(dentry) == -ENOENT) | ||
| 290 | dentry = NULL; | ||
| 291 | } else if (!dentry->d_inode) { | ||
| 292 | dput(dentry); | ||
| 293 | dentry = NULL; | ||
| 294 | } | ||
| 295 | return dentry; | ||
| 296 | } | ||
| 297 | |||
| 298 | struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, | ||
| 299 | unsigned int flags) | ||
| 300 | { | ||
| 301 | struct ovl_entry *oe; | ||
| 302 | struct dentry *upperdir; | ||
| 303 | struct dentry *lowerdir; | ||
| 304 | struct dentry *upperdentry = NULL; | ||
| 305 | struct dentry *lowerdentry = NULL; | ||
| 306 | struct inode *inode = NULL; | ||
| 307 | int err; | ||
| 308 | |||
| 309 | err = -ENOMEM; | ||
| 310 | oe = ovl_alloc_entry(); | ||
| 311 | if (!oe) | ||
| 312 | goto out; | ||
| 313 | |||
| 314 | upperdir = ovl_dentry_upper(dentry->d_parent); | ||
| 315 | lowerdir = ovl_dentry_lower(dentry->d_parent); | ||
| 316 | |||
| 317 | if (upperdir) { | ||
| 318 | upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); | ||
| 319 | err = PTR_ERR(upperdentry); | ||
| 320 | if (IS_ERR(upperdentry)) | ||
| 321 | goto out_put_dir; | ||
| 322 | |||
| 323 | if (lowerdir && upperdentry) { | ||
| 324 | if (ovl_is_whiteout(upperdentry)) { | ||
| 325 | dput(upperdentry); | ||
| 326 | upperdentry = NULL; | ||
| 327 | oe->opaque = true; | ||
| 328 | } else if (ovl_is_opaquedir(upperdentry)) { | ||
| 329 | oe->opaque = true; | ||
| 330 | } | ||
| 331 | } | ||
| 332 | } | ||
| 333 | if (lowerdir && !oe->opaque) { | ||
| 334 | lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); | ||
| 335 | err = PTR_ERR(lowerdentry); | ||
| 336 | if (IS_ERR(lowerdentry)) | ||
| 337 | goto out_dput_upper; | ||
| 338 | } | ||
| 339 | |||
| 340 | if (lowerdentry && upperdentry && | ||
| 341 | (!S_ISDIR(upperdentry->d_inode->i_mode) || | ||
| 342 | !S_ISDIR(lowerdentry->d_inode->i_mode))) { | ||
| 343 | dput(lowerdentry); | ||
| 344 | lowerdentry = NULL; | ||
| 345 | oe->opaque = true; | ||
| 346 | } | ||
| 347 | |||
| 348 | if (lowerdentry || upperdentry) { | ||
| 349 | struct dentry *realdentry; | ||
| 350 | |||
| 351 | realdentry = upperdentry ? upperdentry : lowerdentry; | ||
| 352 | err = -ENOMEM; | ||
| 353 | inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, | ||
| 354 | oe); | ||
| 355 | if (!inode) | ||
| 356 | goto out_dput; | ||
| 357 | ovl_copyattr(realdentry->d_inode, inode); | ||
| 358 | } | ||
| 359 | |||
| 360 | oe->__upperdentry = upperdentry; | ||
| 361 | oe->lowerdentry = lowerdentry; | ||
| 362 | |||
| 363 | dentry->d_fsdata = oe; | ||
| 364 | d_add(dentry, inode); | ||
| 365 | |||
| 366 | return NULL; | ||
| 367 | |||
| 368 | out_dput: | ||
| 369 | dput(lowerdentry); | ||
| 370 | out_dput_upper: | ||
| 371 | dput(upperdentry); | ||
| 372 | out_put_dir: | ||
| 373 | kfree(oe); | ||
| 374 | out: | ||
| 375 | return ERR_PTR(err); | ||
| 376 | } | ||
| 377 | |||
| 378 | struct file *ovl_path_open(struct path *path, int flags) | ||
| 379 | { | ||
| 380 | return dentry_open(path, flags, current_cred()); | ||
| 381 | } | ||
| 382 | |||
| 383 | static void ovl_put_super(struct super_block *sb) | ||
| 384 | { | ||
| 385 | struct ovl_fs *ufs = sb->s_fs_info; | ||
| 386 | |||
| 387 | dput(ufs->workdir); | ||
| 388 | mntput(ufs->upper_mnt); | ||
| 389 | mntput(ufs->lower_mnt); | ||
| 390 | |||
| 391 | kfree(ufs->config.lowerdir); | ||
| 392 | kfree(ufs->config.upperdir); | ||
| 393 | kfree(ufs->config.workdir); | ||
| 394 | kfree(ufs); | ||
| 395 | } | ||
| 396 | |||
| 397 | /** | ||
| 398 | * ovl_statfs | ||
| 399 | * @sb: The overlayfs super block | ||
| 400 | * @buf: The struct kstatfs to fill in with stats | ||
| 401 | * | ||
| 402 | * Get the filesystem statistics. As writes always target the upper layer | ||
| 403 | * filesystem pass the statfs to the same filesystem. | ||
| 404 | */ | ||
| 405 | static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
| 406 | { | ||
| 407 | struct ovl_fs *ofs = dentry->d_sb->s_fs_info; | ||
| 408 | struct dentry *root_dentry = dentry->d_sb->s_root; | ||
| 409 | struct path path; | ||
| 410 | int err; | ||
| 411 | |||
| 412 | ovl_path_upper(root_dentry, &path); | ||
| 413 | |||
| 414 | err = vfs_statfs(&path, buf); | ||
| 415 | if (!err) { | ||
| 416 | buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); | ||
| 417 | buf->f_type = OVERLAYFS_SUPER_MAGIC; | ||
| 418 | } | ||
| 419 | |||
| 420 | return err; | ||
| 421 | } | ||
| 422 | |||
| 423 | /** | ||
| 424 | * ovl_show_options | ||
| 425 | * | ||
| 426 | * Prints the mount options for a given superblock. | ||
| 427 | * Returns zero; does not fail. | ||
| 428 | */ | ||
| 429 | static int ovl_show_options(struct seq_file *m, struct dentry *dentry) | ||
| 430 | { | ||
| 431 | struct super_block *sb = dentry->d_sb; | ||
| 432 | struct ovl_fs *ufs = sb->s_fs_info; | ||
| 433 | |||
| 434 | seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); | ||
| 435 | seq_printf(m, ",upperdir=%s", ufs->config.upperdir); | ||
| 436 | seq_printf(m, ",workdir=%s", ufs->config.workdir); | ||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | |||
| 440 | static const struct super_operations ovl_super_operations = { | ||
| 441 | .put_super = ovl_put_super, | ||
| 442 | .statfs = ovl_statfs, | ||
| 443 | .show_options = ovl_show_options, | ||
| 444 | }; | ||
| 445 | |||
| 446 | enum { | ||
| 447 | OPT_LOWERDIR, | ||
| 448 | OPT_UPPERDIR, | ||
| 449 | OPT_WORKDIR, | ||
| 450 | OPT_ERR, | ||
| 451 | }; | ||
| 452 | |||
| 453 | static const match_table_t ovl_tokens = { | ||
| 454 | {OPT_LOWERDIR, "lowerdir=%s"}, | ||
| 455 | {OPT_UPPERDIR, "upperdir=%s"}, | ||
| 456 | {OPT_WORKDIR, "workdir=%s"}, | ||
| 457 | {OPT_ERR, NULL} | ||
| 458 | }; | ||
| 459 | |||
| 460 | static char *ovl_next_opt(char **s) | ||
| 461 | { | ||
| 462 | char *sbegin = *s; | ||
| 463 | char *p; | ||
| 464 | |||
| 465 | if (sbegin == NULL) | ||
| 466 | return NULL; | ||
| 467 | |||
| 468 | for (p = sbegin; *p; p++) { | ||
| 469 | if (*p == '\\') { | ||
| 470 | p++; | ||
| 471 | if (!*p) | ||
| 472 | break; | ||
| 473 | } else if (*p == ',') { | ||
| 474 | *p = '\0'; | ||
| 475 | *s = p + 1; | ||
| 476 | return sbegin; | ||
| 477 | } | ||
| 478 | } | ||
| 479 | *s = NULL; | ||
| 480 | return sbegin; | ||
| 481 | } | ||
| 482 | |||
| 483 | static int ovl_parse_opt(char *opt, struct ovl_config *config) | ||
| 484 | { | ||
| 485 | char *p; | ||
| 486 | |||
| 487 | while ((p = ovl_next_opt(&opt)) != NULL) { | ||
| 488 | int token; | ||
| 489 | substring_t args[MAX_OPT_ARGS]; | ||
| 490 | |||
| 491 | if (!*p) | ||
| 492 | continue; | ||
| 493 | |||
| 494 | token = match_token(p, ovl_tokens, args); | ||
| 495 | switch (token) { | ||
| 496 | case OPT_UPPERDIR: | ||
| 497 | kfree(config->upperdir); | ||
| 498 | config->upperdir = match_strdup(&args[0]); | ||
| 499 | if (!config->upperdir) | ||
| 500 | return -ENOMEM; | ||
| 501 | break; | ||
| 502 | |||
| 503 | case OPT_LOWERDIR: | ||
| 504 | kfree(config->lowerdir); | ||
| 505 | config->lowerdir = match_strdup(&args[0]); | ||
| 506 | if (!config->lowerdir) | ||
| 507 | return -ENOMEM; | ||
| 508 | break; | ||
| 509 | |||
| 510 | case OPT_WORKDIR: | ||
| 511 | kfree(config->workdir); | ||
| 512 | config->workdir = match_strdup(&args[0]); | ||
| 513 | if (!config->workdir) | ||
| 514 | return -ENOMEM; | ||
| 515 | break; | ||
| 516 | |||
| 517 | default: | ||
| 518 | return -EINVAL; | ||
| 519 | } | ||
| 520 | } | ||
| 521 | return 0; | ||
| 522 | } | ||
| 523 | |||
| 524 | #define OVL_WORKDIR_NAME "work" | ||
| 525 | |||
| 526 | static struct dentry *ovl_workdir_create(struct vfsmount *mnt, | ||
| 527 | struct dentry *dentry) | ||
| 528 | { | ||
| 529 | struct inode *dir = dentry->d_inode; | ||
| 530 | struct dentry *work; | ||
| 531 | int err; | ||
| 532 | bool retried = false; | ||
| 533 | |||
| 534 | err = mnt_want_write(mnt); | ||
| 535 | if (err) | ||
| 536 | return ERR_PTR(err); | ||
| 537 | |||
| 538 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
| 539 | retry: | ||
| 540 | work = lookup_one_len(OVL_WORKDIR_NAME, dentry, | ||
| 541 | strlen(OVL_WORKDIR_NAME)); | ||
| 542 | |||
| 543 | if (!IS_ERR(work)) { | ||
| 544 | struct kstat stat = { | ||
| 545 | .mode = S_IFDIR | 0, | ||
| 546 | }; | ||
| 547 | |||
| 548 | if (work->d_inode) { | ||
| 549 | err = -EEXIST; | ||
| 550 | if (retried) | ||
| 551 | goto out_dput; | ||
| 552 | |||
| 553 | retried = true; | ||
| 554 | ovl_cleanup(dir, work); | ||
| 555 | dput(work); | ||
| 556 | goto retry; | ||
| 557 | } | ||
| 558 | |||
| 559 | err = ovl_create_real(dir, work, &stat, NULL, NULL, true); | ||
| 560 | if (err) | ||
| 561 | goto out_dput; | ||
| 562 | } | ||
| 563 | out_unlock: | ||
| 564 | mutex_unlock(&dir->i_mutex); | ||
| 565 | mnt_drop_write(mnt); | ||
| 566 | |||
| 567 | return work; | ||
| 568 | |||
| 569 | out_dput: | ||
| 570 | dput(work); | ||
| 571 | work = ERR_PTR(err); | ||
| 572 | goto out_unlock; | ||
| 573 | } | ||
| 574 | |||
| 575 | static void ovl_unescape(char *s) | ||
| 576 | { | ||
| 577 | char *d = s; | ||
| 578 | |||
| 579 | for (;; s++, d++) { | ||
| 580 | if (*s == '\\') | ||
| 581 | s++; | ||
| 582 | *d = *s; | ||
| 583 | if (!*s) | ||
| 584 | break; | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 588 | static int ovl_mount_dir(const char *name, struct path *path) | ||
| 589 | { | ||
| 590 | int err; | ||
| 591 | char *tmp = kstrdup(name, GFP_KERNEL); | ||
| 592 | |||
| 593 | if (!tmp) | ||
| 594 | return -ENOMEM; | ||
| 595 | |||
| 596 | ovl_unescape(tmp); | ||
| 597 | err = kern_path(tmp, LOOKUP_FOLLOW, path); | ||
| 598 | if (err) { | ||
| 599 | pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); | ||
| 600 | err = -EINVAL; | ||
| 601 | } | ||
| 602 | kfree(tmp); | ||
| 603 | return err; | ||
| 604 | } | ||
| 605 | |||
| 606 | static bool ovl_is_allowed_fs_type(struct dentry *root) | ||
| 607 | { | ||
| 608 | const struct dentry_operations *dop = root->d_op; | ||
| 609 | |||
| 610 | /* | ||
| 611 | * We don't support: | ||
| 612 | * - automount filesystems | ||
| 613 | * - filesystems with revalidate (FIXME for lower layer) | ||
| 614 | * - filesystems with case insensitive names | ||
| 615 | */ | ||
| 616 | if (dop && | ||
| 617 | (dop->d_manage || dop->d_automount || | ||
| 618 | dop->d_revalidate || dop->d_weak_revalidate || | ||
| 619 | dop->d_compare || dop->d_hash)) { | ||
| 620 | return false; | ||
| 621 | } | ||
| 622 | return true; | ||
| 623 | } | ||
| 624 | |||
| 625 | /* Workdir should not be subdir of upperdir and vice versa */ | ||
| 626 | static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) | ||
| 627 | { | ||
| 628 | bool ok = false; | ||
| 629 | |||
| 630 | if (workdir != upperdir) { | ||
| 631 | ok = (lock_rename(workdir, upperdir) == NULL); | ||
| 632 | unlock_rename(workdir, upperdir); | ||
| 633 | } | ||
| 634 | return ok; | ||
| 635 | } | ||
| 636 | |||
| 637 | static int ovl_fill_super(struct super_block *sb, void *data, int silent) | ||
| 638 | { | ||
| 639 | struct path lowerpath; | ||
| 640 | struct path upperpath; | ||
| 641 | struct path workpath; | ||
| 642 | struct inode *root_inode; | ||
| 643 | struct dentry *root_dentry; | ||
| 644 | struct ovl_entry *oe; | ||
| 645 | struct ovl_fs *ufs; | ||
| 646 | struct kstatfs statfs; | ||
| 647 | int err; | ||
| 648 | |||
| 649 | err = -ENOMEM; | ||
| 650 | ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); | ||
| 651 | if (!ufs) | ||
| 652 | goto out; | ||
| 653 | |||
| 654 | err = ovl_parse_opt((char *) data, &ufs->config); | ||
| 655 | if (err) | ||
| 656 | goto out_free_config; | ||
| 657 | |||
| 658 | /* FIXME: workdir is not needed for a R/O mount */ | ||
| 659 | err = -EINVAL; | ||
| 660 | if (!ufs->config.upperdir || !ufs->config.lowerdir || | ||
| 661 | !ufs->config.workdir) { | ||
| 662 | pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); | ||
| 663 | goto out_free_config; | ||
| 664 | } | ||
| 665 | |||
| 666 | err = -ENOMEM; | ||
| 667 | oe = ovl_alloc_entry(); | ||
| 668 | if (oe == NULL) | ||
| 669 | goto out_free_config; | ||
| 670 | |||
| 671 | err = ovl_mount_dir(ufs->config.upperdir, &upperpath); | ||
| 672 | if (err) | ||
| 673 | goto out_free_oe; | ||
| 674 | |||
| 675 | err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); | ||
| 676 | if (err) | ||
| 677 | goto out_put_upperpath; | ||
| 678 | |||
| 679 | err = ovl_mount_dir(ufs->config.workdir, &workpath); | ||
| 680 | if (err) | ||
| 681 | goto out_put_lowerpath; | ||
| 682 | |||
| 683 | err = -EINVAL; | ||
| 684 | if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || | ||
| 685 | !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || | ||
| 686 | !S_ISDIR(workpath.dentry->d_inode->i_mode)) { | ||
| 687 | pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); | ||
| 688 | goto out_put_workpath; | ||
| 689 | } | ||
| 690 | |||
| 691 | if (upperpath.mnt != workpath.mnt) { | ||
| 692 | pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); | ||
| 693 | goto out_put_workpath; | ||
| 694 | } | ||
| 695 | if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { | ||
| 696 | pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); | ||
| 697 | goto out_put_workpath; | ||
| 698 | } | ||
| 699 | |||
| 700 | if (!ovl_is_allowed_fs_type(upperpath.dentry)) { | ||
| 701 | pr_err("overlayfs: filesystem of upperdir is not supported\n"); | ||
| 702 | goto out_put_workpath; | ||
| 703 | } | ||
| 704 | |||
| 705 | if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { | ||
| 706 | pr_err("overlayfs: filesystem of lowerdir is not supported\n"); | ||
| 707 | goto out_put_workpath; | ||
| 708 | } | ||
| 709 | |||
| 710 | err = vfs_statfs(&lowerpath, &statfs); | ||
| 711 | if (err) { | ||
| 712 | pr_err("overlayfs: statfs failed on lowerpath\n"); | ||
| 713 | goto out_put_workpath; | ||
| 714 | } | ||
| 715 | ufs->lower_namelen = statfs.f_namelen; | ||
| 716 | |||
| 717 | sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, | ||
| 718 | lowerpath.mnt->mnt_sb->s_stack_depth) + 1; | ||
| 719 | |||
| 720 | err = -EINVAL; | ||
| 721 | if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { | ||
| 722 | pr_err("overlayfs: maximum fs stacking depth exceeded\n"); | ||
| 723 | goto out_put_workpath; | ||
| 724 | } | ||
| 725 | |||
| 726 | ufs->upper_mnt = clone_private_mount(&upperpath); | ||
| 727 | err = PTR_ERR(ufs->upper_mnt); | ||
| 728 | if (IS_ERR(ufs->upper_mnt)) { | ||
| 729 | pr_err("overlayfs: failed to clone upperpath\n"); | ||
| 730 | goto out_put_workpath; | ||
| 731 | } | ||
| 732 | |||
| 733 | ufs->lower_mnt = clone_private_mount(&lowerpath); | ||
| 734 | err = PTR_ERR(ufs->lower_mnt); | ||
| 735 | if (IS_ERR(ufs->lower_mnt)) { | ||
| 736 | pr_err("overlayfs: failed to clone lowerpath\n"); | ||
| 737 | goto out_put_upper_mnt; | ||
| 738 | } | ||
| 739 | |||
| 740 | ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); | ||
| 741 | err = PTR_ERR(ufs->workdir); | ||
| 742 | if (IS_ERR(ufs->workdir)) { | ||
| 743 | pr_err("overlayfs: failed to create directory %s/%s\n", | ||
| 744 | ufs->config.workdir, OVL_WORKDIR_NAME); | ||
| 745 | goto out_put_lower_mnt; | ||
| 746 | } | ||
| 747 | |||
| 748 | /* | ||
| 749 | * Make lower_mnt R/O. That way fchmod/fchown on lower file | ||
| 750 | * will fail instead of modifying lower fs. | ||
| 751 | */ | ||
| 752 | ufs->lower_mnt->mnt_flags |= MNT_READONLY; | ||
| 753 | |||
| 754 | /* If the upper fs is r/o, we mark overlayfs r/o too */ | ||
| 755 | if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) | ||
| 756 | sb->s_flags |= MS_RDONLY; | ||
| 757 | |||
| 758 | sb->s_d_op = &ovl_dentry_operations; | ||
| 759 | |||
| 760 | err = -ENOMEM; | ||
| 761 | root_inode = ovl_new_inode(sb, S_IFDIR, oe); | ||
| 762 | if (!root_inode) | ||
| 763 | goto out_put_workdir; | ||
| 764 | |||
| 765 | root_dentry = d_make_root(root_inode); | ||
| 766 | if (!root_dentry) | ||
| 767 | goto out_put_workdir; | ||
| 768 | |||
| 769 | mntput(upperpath.mnt); | ||
| 770 | mntput(lowerpath.mnt); | ||
| 771 | path_put(&workpath); | ||
| 772 | |||
| 773 | oe->__upperdentry = upperpath.dentry; | ||
| 774 | oe->lowerdentry = lowerpath.dentry; | ||
| 775 | |||
| 776 | root_dentry->d_fsdata = oe; | ||
| 777 | |||
| 778 | sb->s_magic = OVERLAYFS_SUPER_MAGIC; | ||
| 779 | sb->s_op = &ovl_super_operations; | ||
| 780 | sb->s_root = root_dentry; | ||
| 781 | sb->s_fs_info = ufs; | ||
| 782 | |||
| 783 | return 0; | ||
| 784 | |||
| 785 | out_put_workdir: | ||
| 786 | dput(ufs->workdir); | ||
| 787 | out_put_lower_mnt: | ||
| 788 | mntput(ufs->lower_mnt); | ||
| 789 | out_put_upper_mnt: | ||
| 790 | mntput(ufs->upper_mnt); | ||
| 791 | out_put_workpath: | ||
| 792 | path_put(&workpath); | ||
| 793 | out_put_lowerpath: | ||
| 794 | path_put(&lowerpath); | ||
| 795 | out_put_upperpath: | ||
| 796 | path_put(&upperpath); | ||
| 797 | out_free_oe: | ||
| 798 | kfree(oe); | ||
| 799 | out_free_config: | ||
| 800 | kfree(ufs->config.lowerdir); | ||
| 801 | kfree(ufs->config.upperdir); | ||
| 802 | kfree(ufs->config.workdir); | ||
| 803 | kfree(ufs); | ||
| 804 | out: | ||
| 805 | return err; | ||
| 806 | } | ||
| 807 | |||
| 808 | static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, | ||
| 809 | const char *dev_name, void *raw_data) | ||
| 810 | { | ||
| 811 | return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); | ||
| 812 | } | ||
| 813 | |||
| 814 | static struct file_system_type ovl_fs_type = { | ||
| 815 | .owner = THIS_MODULE, | ||
| 816 | .name = "overlay", | ||
| 817 | .mount = ovl_mount, | ||
| 818 | .kill_sb = kill_anon_super, | ||
| 819 | }; | ||
| 820 | MODULE_ALIAS_FS("overlay"); | ||
| 821 | |||
| 822 | static int __init ovl_init(void) | ||
| 823 | { | ||
| 824 | return register_filesystem(&ovl_fs_type); | ||
| 825 | } | ||
| 826 | |||
| 827 | static void __exit ovl_exit(void) | ||
| 828 | { | ||
| 829 | unregister_filesystem(&ovl_fs_type); | ||
| 830 | } | ||
| 831 | |||
| 832 | module_init(ovl_init); | ||
| 833 | module_exit(ovl_exit); | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8b663b2d9562..6b4527216a7f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
| @@ -634,7 +634,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type) | |||
| 634 | dqstats_inc(DQST_LOOKUPS); | 634 | dqstats_inc(DQST_LOOKUPS); |
| 635 | err = sb->dq_op->write_dquot(dquot); | 635 | err = sb->dq_op->write_dquot(dquot); |
| 636 | if (!ret && err) | 636 | if (!ret && err) |
| 637 | err = ret; | 637 | ret = err; |
| 638 | dqput(dquot); | 638 | dqput(dquot); |
| 639 | spin_lock(&dq_list_lock); | 639 | spin_lock(&dq_list_lock); |
| 640 | } | 640 | } |
diff --git a/fs/splice.c b/fs/splice.c index f5cb9ba84510..75c6058eabf2 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
| 1330 | 1330 | ||
| 1331 | return ret; | 1331 | return ret; |
| 1332 | } | 1332 | } |
| 1333 | EXPORT_SYMBOL(do_splice_direct); | ||
| 1333 | 1334 | ||
| 1334 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | 1335 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, |
| 1335 | struct pipe_inode_info *opipe, | 1336 | struct pipe_inode_info *opipe, |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 92e8f99a5857..281002689d64 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
| @@ -1338,7 +1338,10 @@ xfs_free_file_space( | |||
| 1338 | goto out; | 1338 | goto out; |
| 1339 | } | 1339 | } |
| 1340 | 1340 | ||
| 1341 | 1341 | /* | |
| 1342 | * Preallocate and zero a range of a file. This mechanism has the allocation | ||
| 1343 | * semantics of fallocate and in addition converts data in the range to zeroes. | ||
| 1344 | */ | ||
| 1342 | int | 1345 | int |
| 1343 | xfs_zero_file_space( | 1346 | xfs_zero_file_space( |
| 1344 | struct xfs_inode *ip, | 1347 | struct xfs_inode *ip, |
| @@ -1346,65 +1349,30 @@ xfs_zero_file_space( | |||
| 1346 | xfs_off_t len) | 1349 | xfs_off_t len) |
| 1347 | { | 1350 | { |
| 1348 | struct xfs_mount *mp = ip->i_mount; | 1351 | struct xfs_mount *mp = ip->i_mount; |
| 1349 | uint granularity; | 1352 | uint blksize; |
| 1350 | xfs_off_t start_boundary; | ||
| 1351 | xfs_off_t end_boundary; | ||
| 1352 | int error; | 1353 | int error; |
| 1353 | 1354 | ||
| 1354 | trace_xfs_zero_file_space(ip); | 1355 | trace_xfs_zero_file_space(ip); |
| 1355 | 1356 | ||
| 1356 | granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | 1357 | blksize = 1 << mp->m_sb.sb_blocklog; |
| 1357 | 1358 | ||
| 1358 | /* | 1359 | /* |
| 1359 | * Round the range of extents we are going to convert inwards. If the | 1360 | * Punch a hole and prealloc the range. We use hole punch rather than |
| 1360 | * offset is aligned, then it doesn't get changed so we zero from the | 1361 | * unwritten extent conversion for two reasons: |
| 1361 | * start of the block offset points to. | 1362 | * |
| 1363 | * 1.) Hole punch handles partial block zeroing for us. | ||
| 1364 | * | ||
| 1365 | * 2.) If prealloc returns ENOSPC, the file range is still zero-valued | ||
| 1366 | * by virtue of the hole punch. | ||
| 1362 | */ | 1367 | */ |
| 1363 | start_boundary = round_up(offset, granularity); | 1368 | error = xfs_free_file_space(ip, offset, len); |
| 1364 | end_boundary = round_down(offset + len, granularity); | 1369 | if (error) |
| 1365 | 1370 | goto out; | |
| 1366 | ASSERT(start_boundary >= offset); | ||
| 1367 | ASSERT(end_boundary <= offset + len); | ||
| 1368 | |||
| 1369 | if (start_boundary < end_boundary - 1) { | ||
| 1370 | /* | ||
| 1371 | * Writeback the range to ensure any inode size updates due to | ||
| 1372 | * appending writes make it to disk (otherwise we could just | ||
| 1373 | * punch out the delalloc blocks). | ||
| 1374 | */ | ||
| 1375 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | ||
| 1376 | start_boundary, end_boundary - 1); | ||
| 1377 | if (error) | ||
| 1378 | goto out; | ||
| 1379 | truncate_pagecache_range(VFS_I(ip), start_boundary, | ||
| 1380 | end_boundary - 1); | ||
| 1381 | |||
| 1382 | /* convert the blocks */ | ||
| 1383 | error = xfs_alloc_file_space(ip, start_boundary, | ||
| 1384 | end_boundary - start_boundary - 1, | ||
| 1385 | XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT); | ||
| 1386 | if (error) | ||
| 1387 | goto out; | ||
| 1388 | |||
| 1389 | /* We've handled the interior of the range, now for the edges */ | ||
| 1390 | if (start_boundary != offset) { | ||
| 1391 | error = xfs_iozero(ip, offset, start_boundary - offset); | ||
| 1392 | if (error) | ||
| 1393 | goto out; | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | if (end_boundary != offset + len) | ||
| 1397 | error = xfs_iozero(ip, end_boundary, | ||
| 1398 | offset + len - end_boundary); | ||
| 1399 | |||
| 1400 | } else { | ||
| 1401 | /* | ||
| 1402 | * It's either a sub-granularity range or the range spanned lies | ||
| 1403 | * partially across two adjacent blocks. | ||
| 1404 | */ | ||
| 1405 | error = xfs_iozero(ip, offset, len); | ||
| 1406 | } | ||
| 1407 | 1371 | ||
| 1372 | error = xfs_alloc_file_space(ip, round_down(offset, blksize), | ||
| 1373 | round_up(offset + len, blksize) - | ||
| 1374 | round_down(offset, blksize), | ||
| 1375 | XFS_BMAPI_PREALLOC); | ||
| 1408 | out: | 1376 | out: |
| 1409 | return error; | 1377 | return error; |
| 1410 | 1378 | ||
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f1deb961a296..894924a5129b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
| @@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk( | |||
| 236 | XFS_WANT_CORRUPTED_RETURN(stat == 1); | 236 | XFS_WANT_CORRUPTED_RETURN(stat == 1); |
| 237 | 237 | ||
| 238 | /* Check if the record contains the inode in request */ | 238 | /* Check if the record contains the inode in request */ |
| 239 | if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) | 239 | if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { |
| 240 | return -EINVAL; | 240 | *icount = 0; |
| 241 | return 0; | ||
| 242 | } | ||
| 241 | 243 | ||
| 242 | idx = agino - irec->ir_startino + 1; | 244 | idx = agino - irec->ir_startino + 1; |
| 243 | if (idx < XFS_INODES_PER_CHUNK && | 245 | if (idx < XFS_INODES_PER_CHUNK && |
| @@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk( | |||
| 262 | 264 | ||
| 263 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) | 265 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) |
| 264 | 266 | ||
| 267 | struct xfs_bulkstat_agichunk { | ||
| 268 | char __user **ac_ubuffer;/* pointer into user's buffer */ | ||
| 269 | int ac_ubleft; /* bytes left in user's buffer */ | ||
| 270 | int ac_ubelem; /* spaces used in user's buffer */ | ||
| 271 | }; | ||
| 272 | |||
| 265 | /* | 273 | /* |
| 266 | * Process inodes in chunk with a pointer to a formatter function | 274 | * Process inodes in chunk with a pointer to a formatter function |
| 267 | * that will iget the inode and fill in the appropriate structure. | 275 | * that will iget the inode and fill in the appropriate structure. |
| 268 | */ | 276 | */ |
| 269 | int | 277 | static int |
| 270 | xfs_bulkstat_ag_ichunk( | 278 | xfs_bulkstat_ag_ichunk( |
| 271 | struct xfs_mount *mp, | 279 | struct xfs_mount *mp, |
| 272 | xfs_agnumber_t agno, | 280 | xfs_agnumber_t agno, |
| 273 | struct xfs_inobt_rec_incore *irbp, | 281 | struct xfs_inobt_rec_incore *irbp, |
| 274 | bulkstat_one_pf formatter, | 282 | bulkstat_one_pf formatter, |
| 275 | size_t statstruct_size, | 283 | size_t statstruct_size, |
| 276 | struct xfs_bulkstat_agichunk *acp) | 284 | struct xfs_bulkstat_agichunk *acp, |
| 285 | xfs_agino_t *last_agino) | ||
| 277 | { | 286 | { |
| 278 | xfs_ino_t lastino = acp->ac_lastino; | ||
| 279 | char __user **ubufp = acp->ac_ubuffer; | 287 | char __user **ubufp = acp->ac_ubuffer; |
| 280 | int ubleft = acp->ac_ubleft; | 288 | int chunkidx; |
| 281 | int ubelem = acp->ac_ubelem; | ||
| 282 | int chunkidx, clustidx; | ||
| 283 | int error = 0; | 289 | int error = 0; |
| 284 | xfs_agino_t agino; | 290 | xfs_agino_t agino = irbp->ir_startino; |
| 285 | 291 | ||
| 286 | for (agino = irbp->ir_startino, chunkidx = clustidx = 0; | 292 | for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; |
| 287 | XFS_BULKSTAT_UBLEFT(ubleft) && | 293 | chunkidx++, agino++) { |
| 288 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; | 294 | int fmterror; |
| 289 | chunkidx++, clustidx++, agino++) { | ||
| 290 | int fmterror; /* bulkstat formatter result */ | ||
| 291 | int ubused; | 295 | int ubused; |
| 292 | xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); | ||
| 293 | 296 | ||
| 294 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); | 297 | /* inode won't fit in buffer, we are done */ |
| 298 | if (acp->ac_ubleft < statstruct_size) | ||
| 299 | break; | ||
| 295 | 300 | ||
| 296 | /* Skip if this inode is free */ | 301 | /* Skip if this inode is free */ |
| 297 | if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { | 302 | if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) |
| 298 | lastino = ino; | ||
| 299 | continue; | 303 | continue; |
| 300 | } | ||
| 301 | |||
| 302 | /* | ||
| 303 | * Count used inodes as free so we can tell when the | ||
| 304 | * chunk is used up. | ||
| 305 | */ | ||
| 306 | irbp->ir_freecount++; | ||
| 307 | 304 | ||
| 308 | /* Get the inode and fill in a single buffer */ | 305 | /* Get the inode and fill in a single buffer */ |
| 309 | ubused = statstruct_size; | 306 | ubused = statstruct_size; |
| 310 | error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); | 307 | error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), |
| 311 | if (fmterror == BULKSTAT_RV_NOTHING) { | 308 | *ubufp, acp->ac_ubleft, &ubused, &fmterror); |
| 312 | if (error && error != -ENOENT && error != -EINVAL) { | 309 | |
| 313 | ubleft = 0; | 310 | if (fmterror == BULKSTAT_RV_GIVEUP || |
| 314 | break; | 311 | (error && error != -ENOENT && error != -EINVAL)) { |
| 315 | } | 312 | acp->ac_ubleft = 0; |
| 316 | lastino = ino; | ||
| 317 | continue; | ||
| 318 | } | ||
| 319 | if (fmterror == BULKSTAT_RV_GIVEUP) { | ||
| 320 | ubleft = 0; | ||
| 321 | ASSERT(error); | 313 | ASSERT(error); |
| 322 | break; | 314 | break; |
| 323 | } | 315 | } |
| 324 | if (*ubufp) | 316 | |
| 325 | *ubufp += ubused; | 317 | /* be careful not to leak error if at end of chunk */ |
| 326 | ubleft -= ubused; | 318 | if (fmterror == BULKSTAT_RV_NOTHING || error) { |
| 327 | ubelem++; | 319 | error = 0; |
| 328 | lastino = ino; | 320 | continue; |
| 321 | } | ||
| 322 | |||
| 323 | *ubufp += ubused; | ||
| 324 | acp->ac_ubleft -= ubused; | ||
| 325 | acp->ac_ubelem++; | ||
| 329 | } | 326 | } |
| 330 | 327 | ||
| 331 | acp->ac_lastino = lastino; | 328 | /* |
| 332 | acp->ac_ubleft = ubleft; | 329 | * Post-update *last_agino. At this point, agino will always point one |
| 333 | acp->ac_ubelem = ubelem; | 330 | * inode past the last inode we processed successfully. Hence we |
| 331 | * substract that inode when setting the *last_agino cursor so that we | ||
| 332 | * return the correct cookie to userspace. On the next bulkstat call, | ||
| 333 | * the inode under the lastino cookie will be skipped as we have already | ||
| 334 | * processed it here. | ||
| 335 | */ | ||
| 336 | *last_agino = agino - 1; | ||
| 334 | 337 | ||
| 335 | return error; | 338 | return error; |
| 336 | } | 339 | } |
| @@ -353,45 +356,33 @@ xfs_bulkstat( | |||
| 353 | xfs_agino_t agino; /* inode # in allocation group */ | 356 | xfs_agino_t agino; /* inode # in allocation group */ |
| 354 | xfs_agnumber_t agno; /* allocation group number */ | 357 | xfs_agnumber_t agno; /* allocation group number */ |
| 355 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ | 358 | xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ |
| 356 | int end_of_ag; /* set if we've seen the ag end */ | ||
| 357 | int error; /* error code */ | ||
| 358 | int fmterror;/* bulkstat formatter result */ | ||
| 359 | int i; /* loop index */ | ||
| 360 | int icount; /* count of inodes good in irbuf */ | ||
| 361 | size_t irbsize; /* size of irec buffer in bytes */ | 359 | size_t irbsize; /* size of irec buffer in bytes */ |
| 362 | xfs_ino_t ino; /* inode number (filesystem) */ | ||
| 363 | xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ | ||
| 364 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ | 360 | xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ |
| 365 | xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ | ||
| 366 | xfs_ino_t lastino; /* last inode number returned */ | ||
| 367 | int nirbuf; /* size of irbuf */ | 361 | int nirbuf; /* size of irbuf */ |
| 368 | int rval; /* return value error code */ | ||
| 369 | int tmp; /* result value from btree calls */ | ||
| 370 | int ubcount; /* size of user's buffer */ | 362 | int ubcount; /* size of user's buffer */ |
| 371 | int ubleft; /* bytes left in user's buffer */ | 363 | struct xfs_bulkstat_agichunk ac; |
| 372 | char __user *ubufp; /* pointer into user's buffer */ | 364 | int error = 0; |
| 373 | int ubelem; /* spaces used in user's buffer */ | ||
| 374 | 365 | ||
| 375 | /* | 366 | /* |
| 376 | * Get the last inode value, see if there's nothing to do. | 367 | * Get the last inode value, see if there's nothing to do. |
| 377 | */ | 368 | */ |
| 378 | ino = (xfs_ino_t)*lastinop; | 369 | agno = XFS_INO_TO_AGNO(mp, *lastinop); |
| 379 | lastino = ino; | 370 | agino = XFS_INO_TO_AGINO(mp, *lastinop); |
| 380 | agno = XFS_INO_TO_AGNO(mp, ino); | ||
| 381 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
| 382 | if (agno >= mp->m_sb.sb_agcount || | 371 | if (agno >= mp->m_sb.sb_agcount || |
| 383 | ino != XFS_AGINO_TO_INO(mp, agno, agino)) { | 372 | *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { |
| 384 | *done = 1; | 373 | *done = 1; |
| 385 | *ubcountp = 0; | 374 | *ubcountp = 0; |
| 386 | return 0; | 375 | return 0; |
| 387 | } | 376 | } |
| 388 | 377 | ||
| 389 | ubcount = *ubcountp; /* statstruct's */ | 378 | ubcount = *ubcountp; /* statstruct's */ |
| 390 | ubleft = ubcount * statstruct_size; /* bytes */ | 379 | ac.ac_ubuffer = &ubuffer; |
| 391 | *ubcountp = ubelem = 0; | 380 | ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; |
| 381 | ac.ac_ubelem = 0; | ||
| 382 | |||
| 383 | *ubcountp = 0; | ||
| 392 | *done = 0; | 384 | *done = 0; |
| 393 | fmterror = 0; | 385 | |
| 394 | ubufp = ubuffer; | ||
| 395 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); | 386 | irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); |
| 396 | if (!irbuf) | 387 | if (!irbuf) |
| 397 | return -ENOMEM; | 388 | return -ENOMEM; |
| @@ -402,9 +393,13 @@ xfs_bulkstat( | |||
| 402 | * Loop over the allocation groups, starting from the last | 393 | * Loop over the allocation groups, starting from the last |
| 403 | * inode returned; 0 means start of the allocation group. | 394 | * inode returned; 0 means start of the allocation group. |
| 404 | */ | 395 | */ |
| 405 | rval = 0; | 396 | while (agno < mp->m_sb.sb_agcount) { |
| 406 | while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { | 397 | struct xfs_inobt_rec_incore *irbp = irbuf; |
| 407 | cond_resched(); | 398 | struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; |
| 399 | bool end_of_ag = false; | ||
| 400 | int icount = 0; | ||
| 401 | int stat; | ||
| 402 | |||
| 408 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | 403 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
| 409 | if (error) | 404 | if (error) |
| 410 | break; | 405 | break; |
| @@ -414,10 +409,6 @@ xfs_bulkstat( | |||
| 414 | */ | 409 | */ |
| 415 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, | 410 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, |
| 416 | XFS_BTNUM_INO); | 411 | XFS_BTNUM_INO); |
| 417 | irbp = irbuf; | ||
| 418 | irbufend = irbuf + nirbuf; | ||
| 419 | end_of_ag = 0; | ||
| 420 | icount = 0; | ||
| 421 | if (agino > 0) { | 412 | if (agino > 0) { |
| 422 | /* | 413 | /* |
| 423 | * In the middle of an allocation group, we need to get | 414 | * In the middle of an allocation group, we need to get |
| @@ -427,22 +418,23 @@ xfs_bulkstat( | |||
| 427 | 418 | ||
| 428 | error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); | 419 | error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); |
| 429 | if (error) | 420 | if (error) |
| 430 | break; | 421 | goto del_cursor; |
| 431 | if (icount) { | 422 | if (icount) { |
| 432 | irbp->ir_startino = r.ir_startino; | 423 | irbp->ir_startino = r.ir_startino; |
| 433 | irbp->ir_freecount = r.ir_freecount; | 424 | irbp->ir_freecount = r.ir_freecount; |
| 434 | irbp->ir_free = r.ir_free; | 425 | irbp->ir_free = r.ir_free; |
| 435 | irbp++; | 426 | irbp++; |
| 436 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; | ||
| 437 | } | 427 | } |
| 438 | /* Increment to the next record */ | 428 | /* Increment to the next record */ |
| 439 | error = xfs_btree_increment(cur, 0, &tmp); | 429 | error = xfs_btree_increment(cur, 0, &stat); |
| 440 | } else { | 430 | } else { |
| 441 | /* Start of ag. Lookup the first inode chunk */ | 431 | /* Start of ag. Lookup the first inode chunk */ |
| 442 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); | 432 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); |
| 433 | } | ||
| 434 | if (error || stat == 0) { | ||
| 435 | end_of_ag = true; | ||
| 436 | goto del_cursor; | ||
| 443 | } | 437 | } |
| 444 | if (error) | ||
| 445 | break; | ||
| 446 | 438 | ||
| 447 | /* | 439 | /* |
| 448 | * Loop through inode btree records in this ag, | 440 | * Loop through inode btree records in this ag, |
| @@ -451,10 +443,10 @@ xfs_bulkstat( | |||
| 451 | while (irbp < irbufend && icount < ubcount) { | 443 | while (irbp < irbufend && icount < ubcount) { |
| 452 | struct xfs_inobt_rec_incore r; | 444 | struct xfs_inobt_rec_incore r; |
| 453 | 445 | ||
| 454 | error = xfs_inobt_get_rec(cur, &r, &i); | 446 | error = xfs_inobt_get_rec(cur, &r, &stat); |
| 455 | if (error || i == 0) { | 447 | if (error || stat == 0) { |
| 456 | end_of_ag = 1; | 448 | end_of_ag = true; |
| 457 | break; | 449 | goto del_cursor; |
| 458 | } | 450 | } |
| 459 | 451 | ||
| 460 | /* | 452 | /* |
| @@ -469,77 +461,79 @@ xfs_bulkstat( | |||
| 469 | irbp++; | 461 | irbp++; |
| 470 | icount += XFS_INODES_PER_CHUNK - r.ir_freecount; | 462 | icount += XFS_INODES_PER_CHUNK - r.ir_freecount; |
| 471 | } | 463 | } |
| 472 | /* | 464 | error = xfs_btree_increment(cur, 0, &stat); |
| 473 | * Set agino to after this chunk and bump the cursor. | 465 | if (error || stat == 0) { |
| 474 | */ | 466 | end_of_ag = true; |
| 475 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; | 467 | goto del_cursor; |
| 476 | error = xfs_btree_increment(cur, 0, &tmp); | 468 | } |
| 477 | cond_resched(); | 469 | cond_resched(); |
| 478 | } | 470 | } |
| 471 | |||
| 479 | /* | 472 | /* |
| 480 | * Drop the btree buffers and the agi buffer. | 473 | * Drop the btree buffers and the agi buffer as we can't hold any |
| 481 | * We can't hold any of the locks these represent | 474 | * of the locks these represent when calling iget. If there is a |
| 482 | * when calling iget. | 475 | * pending error, then we are done. |
| 483 | */ | 476 | */ |
| 477 | del_cursor: | ||
| 484 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 478 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
| 485 | xfs_buf_relse(agbp); | 479 | xfs_buf_relse(agbp); |
| 480 | if (error) | ||
| 481 | break; | ||
| 486 | /* | 482 | /* |
| 487 | * Now format all the good inodes into the user's buffer. | 483 | * Now format all the good inodes into the user's buffer. The |
| 484 | * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer | ||
| 485 | * for the next loop iteration. | ||
| 488 | */ | 486 | */ |
| 489 | irbufend = irbp; | 487 | irbufend = irbp; |
| 490 | for (irbp = irbuf; | 488 | for (irbp = irbuf; |
| 491 | irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { | 489 | irbp < irbufend && ac.ac_ubleft >= statstruct_size; |
| 492 | struct xfs_bulkstat_agichunk ac; | 490 | irbp++) { |
| 493 | |||
| 494 | ac.ac_lastino = lastino; | ||
| 495 | ac.ac_ubuffer = &ubuffer; | ||
| 496 | ac.ac_ubleft = ubleft; | ||
| 497 | ac.ac_ubelem = ubelem; | ||
| 498 | error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, | 491 | error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, |
| 499 | formatter, statstruct_size, &ac); | 492 | formatter, statstruct_size, &ac, |
| 493 | &agino); | ||
| 500 | if (error) | 494 | if (error) |
| 501 | rval = error; | 495 | break; |
| 502 | |||
| 503 | lastino = ac.ac_lastino; | ||
| 504 | ubleft = ac.ac_ubleft; | ||
| 505 | ubelem = ac.ac_ubelem; | ||
| 506 | 496 | ||
| 507 | cond_resched(); | 497 | cond_resched(); |
| 508 | } | 498 | } |
| 499 | |||
| 509 | /* | 500 | /* |
| 510 | * Set up for the next loop iteration. | 501 | * If we've run out of space or had a formatting error, we |
| 502 | * are now done | ||
| 511 | */ | 503 | */ |
| 512 | if (XFS_BULKSTAT_UBLEFT(ubleft)) { | 504 | if (ac.ac_ubleft < statstruct_size || error) |
| 513 | if (end_of_ag) { | ||
| 514 | agno++; | ||
| 515 | agino = 0; | ||
| 516 | } else | ||
| 517 | agino = XFS_INO_TO_AGINO(mp, lastino); | ||
| 518 | } else | ||
| 519 | break; | 505 | break; |
| 506 | |||
| 507 | if (end_of_ag) { | ||
| 508 | agno++; | ||
| 509 | agino = 0; | ||
| 510 | } | ||
| 520 | } | 511 | } |
| 521 | /* | 512 | /* |
| 522 | * Done, we're either out of filesystem or space to put the data. | 513 | * Done, we're either out of filesystem or space to put the data. |
| 523 | */ | 514 | */ |
| 524 | kmem_free(irbuf); | 515 | kmem_free(irbuf); |
| 525 | *ubcountp = ubelem; | 516 | *ubcountp = ac.ac_ubelem; |
| 517 | |||
| 526 | /* | 518 | /* |
| 527 | * Found some inodes, return them now and return the error next time. | 519 | * We found some inodes, so clear the error status and return them. |
| 520 | * The lastino pointer will point directly at the inode that triggered | ||
| 521 | * any error that occurred, so on the next call the error will be | ||
| 522 | * triggered again and propagated to userspace as there will be no | ||
| 523 | * formatted inodes in the buffer. | ||
| 528 | */ | 524 | */ |
| 529 | if (ubelem) | 525 | if (ac.ac_ubelem) |
| 530 | rval = 0; | 526 | error = 0; |
| 531 | if (agno >= mp->m_sb.sb_agcount) { | 527 | |
| 532 | /* | 528 | /* |
| 533 | * If we ran out of filesystem, mark lastino as off | 529 | * If we ran out of filesystem, lastino will point off the end of |
| 534 | * the end of the filesystem, so the next call | 530 | * the filesystem so the next call will return immediately. |
| 535 | * will return immediately. | 531 | */ |
| 536 | */ | 532 | *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); |
| 537 | *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); | 533 | if (agno >= mp->m_sb.sb_agcount) |
| 538 | *done = 1; | 534 | *done = 1; |
| 539 | } else | ||
| 540 | *lastinop = (xfs_ino_t)lastino; | ||
| 541 | 535 | ||
| 542 | return rval; | 536 | return error; |
| 543 | } | 537 | } |
| 544 | 538 | ||
| 545 | int | 539 | int |
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index aaed08022eb9..6ea8b3912fa4 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h | |||
| @@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, | |||
| 30 | int *ubused, | 30 | int *ubused, |
| 31 | int *stat); | 31 | int *stat); |
| 32 | 32 | ||
| 33 | struct xfs_bulkstat_agichunk { | ||
| 34 | xfs_ino_t ac_lastino; /* last inode returned */ | ||
| 35 | char __user **ac_ubuffer;/* pointer into user's buffer */ | ||
| 36 | int ac_ubleft; /* bytes left in user's buffer */ | ||
| 37 | int ac_ubelem; /* spaces used in user's buffer */ | ||
| 38 | }; | ||
| 39 | |||
| 40 | int | ||
| 41 | xfs_bulkstat_ag_ichunk( | ||
| 42 | struct xfs_mount *mp, | ||
| 43 | xfs_agnumber_t agno, | ||
| 44 | struct xfs_inobt_rec_incore *irbp, | ||
| 45 | bulkstat_one_pf formatter, | ||
| 46 | size_t statstruct_size, | ||
| 47 | struct xfs_bulkstat_agichunk *acp); | ||
| 48 | |||
| 49 | /* | 33 | /* |
| 50 | * Values for stat return value. | 34 | * Values for stat return value. |
| 51 | */ | 35 | */ |
