diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Makefile | 2 | ||||
-rw-r--r-- | fs/ext4/acl.c | 1 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 11 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 15 | ||||
-rw-r--r-- | fs/ext4/dir.c | 25 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 265 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 65 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 71 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 56 | ||||
-rw-r--r-- | fs/ext4/extents.c | 386 | ||||
-rw-r--r-- | fs/ext4/file.c | 49 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 88 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 141 | ||||
-rw-r--r-- | fs/ext4/inode.c | 838 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 698 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 4 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 32 | ||||
-rw-r--r-- | fs/ext4/namei.c | 103 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 430 | ||||
-rw-r--r-- | fs/ext4/resize.c | 60 | ||||
-rw-r--r-- | fs/ext4/super.c | 896 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 19 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 10 |
23 files changed, 2836 insertions, 1429 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8867b2a1e5fe..c947e36eda6c 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_EXT4_FS) += ext4.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o |
10 | 10 | ||
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index feaf498feaa6..5e2ed4504ead 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -204,6 +204,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, | |||
204 | return error; | 204 | return error; |
205 | else { | 205 | else { |
206 | inode->i_mode = mode; | 206 | inode->i_mode = mode; |
207 | inode->i_ctime = ext4_current_time(inode); | ||
207 | ext4_mark_inode_dirty(handle, inode); | 208 | ext4_mark_inode_dirty(handle, inode); |
208 | if (error == 0) | 209 | if (error == 0) |
209 | acl = NULL; | 210 | acl = NULL; |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 95b7594c76f9..14c3af26c671 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
171 | * less than the blocksize * 8 ( which is the size | 171 | * less than the blocksize * 8 ( which is the size |
172 | * of bitmap ), set rest of the block bitmap to 1 | 172 | * of bitmap ), set rest of the block bitmap to 1 |
173 | */ | 173 | */ |
174 | mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); | 174 | ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8, |
175 | bh->b_data); | ||
175 | } | 176 | } |
176 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); | 177 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); |
177 | } | 178 | } |
@@ -377,14 +378,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
377 | ext4_grpblk_t bit; | 378 | ext4_grpblk_t bit; |
378 | unsigned int i; | 379 | unsigned int i; |
379 | struct ext4_group_desc *desc; | 380 | struct ext4_group_desc *desc; |
380 | struct ext4_super_block *es; | 381 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
381 | struct ext4_sb_info *sbi; | ||
382 | int err = 0, ret, blk_free_count; | 382 | int err = 0, ret, blk_free_count; |
383 | ext4_grpblk_t blocks_freed; | 383 | ext4_grpblk_t blocks_freed; |
384 | struct ext4_group_info *grp; | 384 | struct ext4_group_info *grp; |
385 | 385 | ||
386 | sbi = EXT4_SB(sb); | ||
387 | es = sbi->s_es; | ||
388 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | 386 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); |
389 | 387 | ||
390 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 388 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -477,7 +475,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
477 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | 475 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); |
478 | if (!err) | 476 | if (!err) |
479 | err = ret; | 477 | err = ret; |
480 | sb->s_dirt = 1; | ||
481 | 478 | ||
482 | error_return: | 479 | error_return: |
483 | brelse(bitmap_bh); | 480 | brelse(bitmap_bh); |
@@ -493,7 +490,7 @@ error_return: | |||
493 | * Check if filesystem has nblocks free & available for allocation. | 490 | * Check if filesystem has nblocks free & available for allocation. |
494 | * On success return 1, return 0 on failure. | 491 | * On success return 1, return 0 on failure. |
495 | */ | 492 | */ |
496 | int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | 493 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) |
497 | { | 494 | { |
498 | s64 free_blocks, dirty_blocks, root_blocks; | 495 | s64 free_blocks, dirty_blocks, root_blocks; |
499 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | 496 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 5b6973fbf1bd..fac90f3fba80 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -29,16 +29,15 @@ struct ext4_system_zone { | |||
29 | 29 | ||
30 | static struct kmem_cache *ext4_system_zone_cachep; | 30 | static struct kmem_cache *ext4_system_zone_cachep; |
31 | 31 | ||
32 | int __init init_ext4_system_zone(void) | 32 | int __init ext4_init_system_zone(void) |
33 | { | 33 | { |
34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, | 34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0); |
35 | SLAB_RECLAIM_ACCOUNT); | ||
36 | if (ext4_system_zone_cachep == NULL) | 35 | if (ext4_system_zone_cachep == NULL) |
37 | return -ENOMEM; | 36 | return -ENOMEM; |
38 | return 0; | 37 | return 0; |
39 | } | 38 | } |
40 | 39 | ||
41 | void exit_ext4_system_zone(void) | 40 | void ext4_exit_system_zone(void) |
42 | { | 41 | { |
43 | kmem_cache_destroy(ext4_system_zone_cachep); | 42 | kmem_cache_destroy(ext4_system_zone_cachep); |
44 | } | 43 | } |
@@ -229,16 +228,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, | |||
229 | 228 | ||
230 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || | 229 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || |
231 | (start_blk + count < start_blk) || | 230 | (start_blk + count < start_blk) || |
232 | (start_blk + count > ext4_blocks_count(sbi->s_es))) | 231 | (start_blk + count > ext4_blocks_count(sbi->s_es))) { |
232 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | ||
233 | return 0; | 233 | return 0; |
234 | } | ||
234 | while (n) { | 235 | while (n) { |
235 | entry = rb_entry(n, struct ext4_system_zone, node); | 236 | entry = rb_entry(n, struct ext4_system_zone, node); |
236 | if (start_blk + count - 1 < entry->start_blk) | 237 | if (start_blk + count - 1 < entry->start_blk) |
237 | n = n->rb_left; | 238 | n = n->rb_left; |
238 | else if (start_blk >= (entry->start_blk + entry->count)) | 239 | else if (start_blk >= (entry->start_blk + entry->count)) |
239 | n = n->rb_right; | 240 | n = n->rb_right; |
240 | else | 241 | else { |
242 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | ||
241 | return 0; | 243 | return 0; |
244 | } | ||
242 | } | 245 | } |
243 | return 1; | 246 | return 1; |
244 | } | 247 | } |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ea5e6cb7e2a5..ece76fb6a40c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode, | |||
39 | struct file *filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = ext4_llseek, |
43 | .read = generic_read_dir, | 43 | .read = generic_read_dir, |
44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ | 44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ |
45 | .unlocked_ioctl = ext4_ioctl, | 45 | .unlocked_ioctl = ext4_ioctl, |
@@ -61,10 +61,11 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | 63 | ||
64 | int ext4_check_dir_entry(const char *function, struct inode *dir, | 64 | int __ext4_check_dir_entry(const char *function, unsigned int line, |
65 | struct ext4_dir_entry_2 *de, | 65 | struct inode *dir, |
66 | struct buffer_head *bh, | 66 | struct ext4_dir_entry_2 *de, |
67 | unsigned int offset) | 67 | struct buffer_head *bh, |
68 | unsigned int offset) | ||
68 | { | 69 | { |
69 | const char *error_msg = NULL; | 70 | const char *error_msg = NULL; |
70 | const int rlen = ext4_rec_len_from_disk(de->rec_len, | 71 | const int rlen = ext4_rec_len_from_disk(de->rec_len, |
@@ -83,11 +84,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, | |||
83 | error_msg = "inode out of bounds"; | 84 | error_msg = "inode out of bounds"; |
84 | 85 | ||
85 | if (error_msg != NULL) | 86 | if (error_msg != NULL) |
86 | ext4_error_inode(function, dir, | 87 | ext4_error_inode(dir, function, line, bh->b_blocknr, |
87 | "bad entry in directory: %s - block=%llu" | 88 | "bad entry in directory: %s - " |
88 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", | 89 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", |
89 | error_msg, (unsigned long long) bh->b_blocknr, | 90 | error_msg, (unsigned) (offset%bh->b_size), offset, |
90 | (unsigned) (offset%bh->b_size), offset, | ||
91 | le32_to_cpu(de->inode), | 91 | le32_to_cpu(de->inode), |
92 | rlen, de->name_len); | 92 | rlen, de->name_len); |
93 | return error_msg == NULL ? 1 : 0; | 93 | return error_msg == NULL ? 1 : 0; |
@@ -121,7 +121,8 @@ static int ext4_readdir(struct file *filp, | |||
121 | * We don't set the inode dirty flag since it's not | 121 | * We don't set the inode dirty flag since it's not |
122 | * critical that it get flushed back to the disk. | 122 | * critical that it get flushed back to the disk. |
123 | */ | 123 | */ |
124 | ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); | 124 | ext4_clear_inode_flag(filp->f_path.dentry->d_inode, |
125 | EXT4_INODE_INDEX); | ||
125 | } | 126 | } |
126 | stored = 0; | 127 | stored = 0; |
127 | offset = filp->f_pos & (sb->s_blocksize - 1); | 128 | offset = filp->f_pos & (sb->s_blocksize - 1); |
@@ -193,7 +194,7 @@ revalidate: | |||
193 | while (!error && filp->f_pos < inode->i_size | 194 | while (!error && filp->f_pos < inode->i_size |
194 | && offset < sb->s_blocksize) { | 195 | && offset < sb->s_blocksize) { |
195 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 196 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
196 | if (!ext4_check_dir_entry("ext4_readdir", inode, de, | 197 | if (!ext4_check_dir_entry(inode, de, |
197 | bh, offset)) { | 198 | bh, offset)) { |
198 | /* | 199 | /* |
199 | * On error, skip the f_pos to the next block | 200 | * On error, skip the f_pos to the next block |
@@ -343,7 +344,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
343 | struct dir_private_info *info; | 344 | struct dir_private_info *info; |
344 | int len; | 345 | int len; |
345 | 346 | ||
346 | info = (struct dir_private_info *) dir_file->private_data; | 347 | info = dir_file->private_data; |
347 | p = &info->root.rb_node; | 348 | p = &info->root.rb_node; |
348 | 349 | ||
349 | /* Create and allocate the fname structure */ | 350 | /* Create and allocate the fname structure */ |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 19a4de57128a..8b5dd6369f82 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -57,10 +57,13 @@ | |||
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | 59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ |
60 | ext4_error_inode(__func__, (inode), (fmt), ## a) | 60 | ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) |
61 | |||
62 | #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ | ||
63 | ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) | ||
61 | 64 | ||
62 | #define EXT4_ERROR_FILE(file, fmt, a...) \ | 65 | #define EXT4_ERROR_FILE(file, fmt, a...) \ |
63 | ext4_error_file(__func__, (file), (fmt), ## a) | 66 | ext4_error_file(__func__, __LINE__, (file), (fmt), ## a) |
64 | 67 | ||
65 | /* data type for block offset of block group */ | 68 | /* data type for block offset of block group */ |
66 | typedef int ext4_grpblk_t; | 69 | typedef int ext4_grpblk_t; |
@@ -165,17 +168,42 @@ struct mpage_da_data { | |||
165 | int pages_written; | 168 | int pages_written; |
166 | int retval; | 169 | int retval; |
167 | }; | 170 | }; |
168 | #define EXT4_IO_UNWRITTEN 0x1 | 171 | |
172 | /* | ||
173 | * Flags for ext4_io_end->flags | ||
174 | */ | ||
175 | #define EXT4_IO_END_UNWRITTEN 0x0001 | ||
176 | #define EXT4_IO_END_ERROR 0x0002 | ||
177 | |||
178 | struct ext4_io_page { | ||
179 | struct page *p_page; | ||
180 | int p_count; | ||
181 | }; | ||
182 | |||
183 | #define MAX_IO_PAGES 128 | ||
184 | |||
169 | typedef struct ext4_io_end { | 185 | typedef struct ext4_io_end { |
170 | struct list_head list; /* per-file finished AIO list */ | 186 | struct list_head list; /* per-file finished IO list */ |
171 | struct inode *inode; /* file being written to */ | 187 | struct inode *inode; /* file being written to */ |
172 | unsigned int flag; /* unwritten or not */ | 188 | unsigned int flag; /* unwritten or not */ |
173 | struct page *page; /* page struct for buffer write */ | 189 | struct page *page; /* page struct for buffer write */ |
174 | loff_t offset; /* offset in the file */ | 190 | loff_t offset; /* offset in the file */ |
175 | ssize_t size; /* size of the extent */ | 191 | ssize_t size; /* size of the extent */ |
176 | struct work_struct work; /* data work queue */ | 192 | struct work_struct work; /* data work queue */ |
193 | struct kiocb *iocb; /* iocb struct for AIO */ | ||
194 | int result; /* error value for AIO */ | ||
195 | int num_io_pages; | ||
196 | struct ext4_io_page *pages[MAX_IO_PAGES]; | ||
177 | } ext4_io_end_t; | 197 | } ext4_io_end_t; |
178 | 198 | ||
199 | struct ext4_io_submit { | ||
200 | int io_op; | ||
201 | struct bio *io_bio; | ||
202 | ext4_io_end_t *io_end; | ||
203 | struct ext4_io_page *io_page; | ||
204 | sector_t io_next_block; | ||
205 | }; | ||
206 | |||
179 | /* | 207 | /* |
180 | * Special inodes numbers | 208 | * Special inodes numbers |
181 | */ | 209 | */ |
@@ -200,6 +228,7 @@ typedef struct ext4_io_end { | |||
200 | #define EXT4_MIN_BLOCK_SIZE 1024 | 228 | #define EXT4_MIN_BLOCK_SIZE 1024 |
201 | #define EXT4_MAX_BLOCK_SIZE 65536 | 229 | #define EXT4_MAX_BLOCK_SIZE 65536 |
202 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 | 230 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 |
231 | #define EXT4_MAX_BLOCK_LOG_SIZE 16 | ||
203 | #ifdef __KERNEL__ | 232 | #ifdef __KERNEL__ |
204 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) | 233 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) |
205 | #else | 234 | #else |
@@ -460,7 +489,7 @@ struct ext4_new_group_data { | |||
460 | }; | 489 | }; |
461 | 490 | ||
462 | /* | 491 | /* |
463 | * Flags used by ext4_get_blocks() | 492 | * Flags used by ext4_map_blocks() |
464 | */ | 493 | */ |
465 | /* Allocate any needed blocks and/or convert an unitialized | 494 | /* Allocate any needed blocks and/or convert an unitialized |
466 | extent to be an initialized ext4 */ | 495 | extent to be an initialized ext4 */ |
@@ -873,7 +902,6 @@ struct ext4_inode_info { | |||
873 | #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ | 902 | #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ |
874 | #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ | 903 | #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ |
875 | #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ | 904 | #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ |
876 | #define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ | ||
877 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 905 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
878 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 906 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
879 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 907 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
@@ -885,6 +913,7 @@ struct ext4_inode_info { | |||
885 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 913 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
886 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 914 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
887 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 915 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
916 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | ||
888 | 917 | ||
889 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 918 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
890 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt | 919 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt |
@@ -982,7 +1011,7 @@ struct ext4_super_block { | |||
982 | __le32 s_last_orphan; /* start of list of inodes to delete */ | 1011 | __le32 s_last_orphan; /* start of list of inodes to delete */ |
983 | __le32 s_hash_seed[4]; /* HTREE hash seed */ | 1012 | __le32 s_hash_seed[4]; /* HTREE hash seed */ |
984 | __u8 s_def_hash_version; /* Default hash version to use */ | 1013 | __u8 s_def_hash_version; /* Default hash version to use */ |
985 | __u8 s_reserved_char_pad; | 1014 | __u8 s_jnl_backup_type; |
986 | __le16 s_desc_size; /* size of group descriptor */ | 1015 | __le16 s_desc_size; /* size of group descriptor */ |
987 | /*100*/ __le32 s_default_mount_opts; | 1016 | /*100*/ __le32 s_default_mount_opts; |
988 | __le32 s_first_meta_bg; /* First metablock block group */ | 1017 | __le32 s_first_meta_bg; /* First metablock block group */ |
@@ -1000,12 +1029,34 @@ struct ext4_super_block { | |||
1000 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 1029 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
1001 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 1030 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
1002 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ | 1031 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
1003 | __u8 s_reserved_char_pad2; | 1032 | __u8 s_reserved_char_pad; |
1004 | __le16 s_reserved_pad; | 1033 | __le16 s_reserved_pad; |
1005 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ | 1034 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ |
1006 | __u32 s_reserved[160]; /* Padding to the end of the block */ | 1035 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ |
1036 | __le32 s_snapshot_id; /* sequential ID of active snapshot */ | ||
1037 | __le64 s_snapshot_r_blocks_count; /* reserved blocks for active | ||
1038 | snapshot's future use */ | ||
1039 | __le32 s_snapshot_list; /* inode number of the head of the | ||
1040 | on-disk snapshot list */ | ||
1041 | #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count) | ||
1042 | __le32 s_error_count; /* number of fs errors */ | ||
1043 | __le32 s_first_error_time; /* first time an error happened */ | ||
1044 | __le32 s_first_error_ino; /* inode involved in first error */ | ||
1045 | __le64 s_first_error_block; /* block involved of first error */ | ||
1046 | __u8 s_first_error_func[32]; /* function where the error happened */ | ||
1047 | __le32 s_first_error_line; /* line number where error happened */ | ||
1048 | __le32 s_last_error_time; /* most recent time of an error */ | ||
1049 | __le32 s_last_error_ino; /* inode involved in last error */ | ||
1050 | __le32 s_last_error_line; /* line number where error happened */ | ||
1051 | __le64 s_last_error_block; /* block involved of last error */ | ||
1052 | __u8 s_last_error_func[32]; /* function where the error happened */ | ||
1053 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) | ||
1054 | __u8 s_mount_opts[64]; | ||
1055 | __le32 s_reserved[112]; /* Padding to the end of the block */ | ||
1007 | }; | 1056 | }; |
1008 | 1057 | ||
1058 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) | ||
1059 | |||
1009 | #ifdef __KERNEL__ | 1060 | #ifdef __KERNEL__ |
1010 | 1061 | ||
1011 | /* | 1062 | /* |
@@ -1061,7 +1112,6 @@ struct ext4_sb_info { | |||
1061 | struct completion s_kobj_unregister; | 1112 | struct completion s_kobj_unregister; |
1062 | 1113 | ||
1063 | /* Journaling */ | 1114 | /* Journaling */ |
1064 | struct inode *s_journal_inode; | ||
1065 | struct journal_s *s_journal; | 1115 | struct journal_s *s_journal; |
1066 | struct list_head s_orphan; | 1116 | struct list_head s_orphan; |
1067 | struct mutex s_orphan_lock; | 1117 | struct mutex s_orphan_lock; |
@@ -1094,10 +1144,7 @@ struct ext4_sb_info { | |||
1094 | /* for buddy allocator */ | 1144 | /* for buddy allocator */ |
1095 | struct ext4_group_info ***s_group_info; | 1145 | struct ext4_group_info ***s_group_info; |
1096 | struct inode *s_buddy_cache; | 1146 | struct inode *s_buddy_cache; |
1097 | long s_blocks_reserved; | ||
1098 | spinlock_t s_reserve_lock; | ||
1099 | spinlock_t s_md_lock; | 1147 | spinlock_t s_md_lock; |
1100 | tid_t s_last_transaction; | ||
1101 | unsigned short *s_mb_offsets; | 1148 | unsigned short *s_mb_offsets; |
1102 | unsigned int *s_mb_maxs; | 1149 | unsigned int *s_mb_maxs; |
1103 | 1150 | ||
@@ -1115,7 +1162,6 @@ struct ext4_sb_info { | |||
1115 | unsigned long s_mb_last_start; | 1162 | unsigned long s_mb_last_start; |
1116 | 1163 | ||
1117 | /* stats for buddy allocator */ | 1164 | /* stats for buddy allocator */ |
1118 | spinlock_t s_mb_pa_lock; | ||
1119 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | 1165 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ |
1120 | atomic_t s_bal_success; /* we found long enough chunks */ | 1166 | atomic_t s_bal_success; /* we found long enough chunks */ |
1121 | atomic_t s_bal_allocated; /* in blocks */ | 1167 | atomic_t s_bal_allocated; /* in blocks */ |
@@ -1143,6 +1189,14 @@ struct ext4_sb_info { | |||
1143 | 1189 | ||
1144 | /* workqueue for dio unwritten */ | 1190 | /* workqueue for dio unwritten */ |
1145 | struct workqueue_struct *dio_unwritten_wq; | 1191 | struct workqueue_struct *dio_unwritten_wq; |
1192 | |||
1193 | /* timer for periodic error stats printing */ | ||
1194 | struct timer_list s_err_report; | ||
1195 | |||
1196 | /* Lazy inode table initialization info */ | ||
1197 | struct ext4_li_request *s_li_request; | ||
1198 | /* Wait multiplier for lazy initialization thread */ | ||
1199 | unsigned int s_li_wait_mult; | ||
1146 | }; | 1200 | }; |
1147 | 1201 | ||
1148 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1202 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1313,6 +1367,10 @@ EXT4_INODE_BIT_FNS(state, state_flags) | |||
1313 | #define EXT4_DEFM_JMODE_DATA 0x0020 | 1367 | #define EXT4_DEFM_JMODE_DATA 0x0020 |
1314 | #define EXT4_DEFM_JMODE_ORDERED 0x0040 | 1368 | #define EXT4_DEFM_JMODE_ORDERED 0x0040 |
1315 | #define EXT4_DEFM_JMODE_WBACK 0x0060 | 1369 | #define EXT4_DEFM_JMODE_WBACK 0x0060 |
1370 | #define EXT4_DEFM_NOBARRIER 0x0100 | ||
1371 | #define EXT4_DEFM_BLOCK_VALIDITY 0x0200 | ||
1372 | #define EXT4_DEFM_DISCARD 0x0400 | ||
1373 | #define EXT4_DEFM_NODELALLOC 0x0800 | ||
1316 | 1374 | ||
1317 | /* | 1375 | /* |
1318 | * Default journal batch times | 1376 | * Default journal batch times |
@@ -1379,6 +1437,43 @@ struct ext4_dir_entry_2 { | |||
1379 | #define EXT4_MAX_REC_LEN ((1<<16)-1) | 1437 | #define EXT4_MAX_REC_LEN ((1<<16)-1) |
1380 | 1438 | ||
1381 | /* | 1439 | /* |
1440 | * If we ever get support for fs block sizes > page_size, we'll need | ||
1441 | * to remove the #if statements in the next two functions... | ||
1442 | */ | ||
1443 | static inline unsigned int | ||
1444 | ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) | ||
1445 | { | ||
1446 | unsigned len = le16_to_cpu(dlen); | ||
1447 | |||
1448 | #if (PAGE_CACHE_SIZE >= 65536) | ||
1449 | if (len == EXT4_MAX_REC_LEN || len == 0) | ||
1450 | return blocksize; | ||
1451 | return (len & 65532) | ((len & 3) << 16); | ||
1452 | #else | ||
1453 | return len; | ||
1454 | #endif | ||
1455 | } | ||
1456 | |||
1457 | static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | ||
1458 | { | ||
1459 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) | ||
1460 | BUG(); | ||
1461 | #if (PAGE_CACHE_SIZE >= 65536) | ||
1462 | if (len < 65536) | ||
1463 | return cpu_to_le16(len); | ||
1464 | if (len == blocksize) { | ||
1465 | if (blocksize == 65536) | ||
1466 | return cpu_to_le16(EXT4_MAX_REC_LEN); | ||
1467 | else | ||
1468 | return cpu_to_le16(0); | ||
1469 | } | ||
1470 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); | ||
1471 | #else | ||
1472 | return cpu_to_le16(len); | ||
1473 | #endif | ||
1474 | } | ||
1475 | |||
1476 | /* | ||
1382 | * Hash Tree Directory indexing | 1477 | * Hash Tree Directory indexing |
1383 | * (c) Daniel Phillips, 2001 | 1478 | * (c) Daniel Phillips, 2001 |
1384 | */ | 1479 | */ |
@@ -1463,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
1463 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 1558 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
1464 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); | 1559 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); |
1465 | 1560 | ||
1466 | extern struct proc_dir_entry *ext4_proc_root; | 1561 | /* |
1562 | * Timeout and state flag for lazy initialization inode thread. | ||
1563 | */ | ||
1564 | #define EXT4_DEF_LI_WAIT_MULT 10 | ||
1565 | #define EXT4_DEF_LI_MAX_START_DELAY 5 | ||
1566 | #define EXT4_LAZYINIT_QUIT 0x0001 | ||
1567 | #define EXT4_LAZYINIT_RUNNING 0x0002 | ||
1568 | |||
1569 | /* | ||
1570 | * Lazy inode table initialization info | ||
1571 | */ | ||
1572 | struct ext4_lazy_init { | ||
1573 | unsigned long li_state; | ||
1574 | |||
1575 | wait_queue_head_t li_wait_daemon; | ||
1576 | wait_queue_head_t li_wait_task; | ||
1577 | struct timer_list li_timer; | ||
1578 | struct task_struct *li_task; | ||
1579 | |||
1580 | struct list_head li_request_list; | ||
1581 | struct mutex li_list_mtx; | ||
1582 | }; | ||
1583 | |||
1584 | struct ext4_li_request { | ||
1585 | struct super_block *lr_super; | ||
1586 | struct ext4_sb_info *lr_sbi; | ||
1587 | ext4_group_t lr_next_group; | ||
1588 | struct list_head lr_request; | ||
1589 | unsigned long lr_next_sched; | ||
1590 | unsigned long lr_timeout; | ||
1591 | }; | ||
1592 | |||
1593 | struct ext4_features { | ||
1594 | struct kobject f_kobj; | ||
1595 | struct completion f_kobj_unregister; | ||
1596 | }; | ||
1467 | 1597 | ||
1468 | /* | 1598 | /* |
1469 | * Function prototypes | 1599 | * Function prototypes |
@@ -1491,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | |||
1491 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 1621 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
1492 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 1622 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
1493 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1623 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
1494 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | ||
1495 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 1624 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
1496 | ext4_fsblk_t block, unsigned long count); | 1625 | ext4_fsblk_t block, unsigned long count); |
1497 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1626 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
@@ -1510,9 +1639,11 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb, | |||
1510 | ext4_init_block_bitmap(sb, NULL, group, desc) | 1639 | ext4_init_block_bitmap(sb, NULL, group, desc) |
1511 | 1640 | ||
1512 | /* dir.c */ | 1641 | /* dir.c */ |
1513 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1642 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, |
1514 | struct ext4_dir_entry_2 *, | 1643 | struct ext4_dir_entry_2 *, |
1515 | struct buffer_head *, unsigned int); | 1644 | struct buffer_head *, unsigned int); |
1645 | #define ext4_check_dir_entry(dir, de, bh, offset) \ | ||
1646 | __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset)) | ||
1516 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | 1647 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, |
1517 | __u32 minor_hash, | 1648 | __u32 minor_hash, |
1518 | struct ext4_dir_entry_2 *dirent); | 1649 | struct ext4_dir_entry_2 *dirent); |
@@ -1533,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | |||
1533 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1664 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1534 | extern unsigned long ext4_count_dirs(struct super_block *); | 1665 | extern unsigned long ext4_count_dirs(struct super_block *); |
1535 | extern void ext4_check_inodes_bitmap(struct super_block *); | 1666 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1536 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | 1667 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
1537 | struct buffer_head *bh, | 1668 | extern int ext4_init_inode_table(struct super_block *sb, |
1538 | ext4_group_t group, | 1669 | ext4_group_t group, int barrier); |
1539 | struct ext4_group_desc *desc); | ||
1540 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
1541 | 1670 | ||
1542 | /* mballoc.c */ | 1671 | /* mballoc.c */ |
1543 | extern long ext4_mb_stats; | 1672 | extern long ext4_mb_stats; |
@@ -1548,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | |||
1548 | struct ext4_allocation_request *, int *); | 1677 | struct ext4_allocation_request *, int *); |
1549 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1678 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1550 | extern void ext4_discard_preallocations(struct inode *); | 1679 | extern void ext4_discard_preallocations(struct inode *); |
1551 | extern int __init init_ext4_mballoc(void); | 1680 | extern int __init ext4_init_mballoc(void); |
1552 | extern void exit_ext4_mballoc(void); | 1681 | extern void ext4_exit_mballoc(void); |
1553 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 1682 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1554 | struct buffer_head *bh, ext4_fsblk_t block, | 1683 | struct buffer_head *bh, ext4_fsblk_t block, |
1555 | unsigned long count, int flags); | 1684 | unsigned long count, int flags); |
1556 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1685 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1557 | ext4_group_t i, struct ext4_group_desc *desc); | 1686 | ext4_group_t i, struct ext4_group_desc *desc); |
1558 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1687 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
1559 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1688 | |
1560 | ext4_group_t, int); | ||
1561 | /* inode.c */ | 1689 | /* inode.c */ |
1562 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 1690 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, |
1563 | ext4_lblk_t, int, int *); | 1691 | ext4_lblk_t, int, int *); |
@@ -1571,7 +1699,8 @@ extern int ext4_write_inode(struct inode *, struct writeback_control *); | |||
1571 | extern int ext4_setattr(struct dentry *, struct iattr *); | 1699 | extern int ext4_setattr(struct dentry *, struct iattr *); |
1572 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1700 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1573 | struct kstat *stat); | 1701 | struct kstat *stat); |
1574 | extern void ext4_delete_inode(struct inode *); | 1702 | extern void ext4_evict_inode(struct inode *); |
1703 | extern void ext4_clear_inode(struct inode *); | ||
1575 | extern int ext4_sync_inode(handle_t *, struct inode *); | 1704 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1576 | extern void ext4_dirty_inode(struct inode *); | 1705 | extern void ext4_dirty_inode(struct inode *); |
1577 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1706 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
@@ -1584,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *); | |||
1584 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1713 | extern int ext4_alloc_da_blocks(struct inode *inode); |
1585 | extern void ext4_set_aops(struct inode *inode); | 1714 | extern void ext4_set_aops(struct inode *inode); |
1586 | extern int ext4_writepage_trans_blocks(struct inode *); | 1715 | extern int ext4_writepage_trans_blocks(struct inode *); |
1587 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1588 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 1716 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
1589 | extern int ext4_block_truncate_page(handle_t *handle, | 1717 | extern int ext4_block_truncate_page(handle_t *handle, |
1590 | struct address_space *mapping, loff_t from); | 1718 | struct address_space *mapping, loff_t from); |
1591 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1719 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1592 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1720 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1593 | extern int flush_completed_IO(struct inode *inode); | ||
1594 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1721 | extern void ext4_da_update_reserve_space(struct inode *inode, |
1595 | int used, int quota_claim); | 1722 | int used, int quota_claim); |
1596 | /* ioctl.c */ | 1723 | /* ioctl.c */ |
@@ -1601,8 +1728,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); | |||
1601 | extern int ext4_ext_migrate(struct inode *); | 1728 | extern int ext4_ext_migrate(struct inode *); |
1602 | 1729 | ||
1603 | /* namei.c */ | 1730 | /* namei.c */ |
1604 | extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize); | ||
1605 | extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize); | ||
1606 | extern int ext4_orphan_add(handle_t *, struct inode *); | 1731 | extern int ext4_orphan_add(handle_t *, struct inode *); |
1607 | extern int ext4_orphan_del(handle_t *, struct inode *); | 1732 | extern int ext4_orphan_del(handle_t *, struct inode *); |
1608 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | 1733 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, |
@@ -1616,25 +1741,38 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1616 | ext4_fsblk_t n_blocks_count); | 1741 | ext4_fsblk_t n_blocks_count); |
1617 | 1742 | ||
1618 | /* super.c */ | 1743 | /* super.c */ |
1619 | extern void __ext4_error(struct super_block *, const char *, const char *, ...) | 1744 | extern void __ext4_error(struct super_block *, const char *, unsigned int, |
1620 | __attribute__ ((format (printf, 3, 4))); | 1745 | const char *, ...) |
1621 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) | 1746 | __attribute__ ((format (printf, 4, 5))); |
1622 | extern void ext4_error_inode(const char *, struct inode *, const char *, ...) | 1747 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, \ |
1623 | __attribute__ ((format (printf, 3, 4))); | 1748 | __LINE__, ## message) |
1624 | extern void ext4_error_file(const char *, struct file *, const char *, ...) | 1749 | extern void ext4_error_inode(struct inode *, const char *, unsigned int, |
1625 | __attribute__ ((format (printf, 3, 4))); | 1750 | ext4_fsblk_t, const char *, ...) |
1626 | extern void __ext4_std_error(struct super_block *, const char *, int); | 1751 | __attribute__ ((format (printf, 5, 6))); |
1627 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) | 1752 | extern void ext4_error_file(struct file *, const char *, unsigned int, |
1628 | __attribute__ ((format (printf, 3, 4))); | 1753 | const char *, ...) |
1629 | extern void __ext4_warning(struct super_block *, const char *, | 1754 | __attribute__ ((format (printf, 4, 5))); |
1755 | extern void __ext4_std_error(struct super_block *, const char *, | ||
1756 | unsigned int, int); | ||
1757 | extern void __ext4_abort(struct super_block *, const char *, unsigned int, | ||
1758 | const char *, ...) | ||
1759 | __attribute__ ((format (printf, 4, 5))); | ||
1760 | #define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ | ||
1761 | __LINE__, ## message) | ||
1762 | extern void __ext4_warning(struct super_block *, const char *, unsigned int, | ||
1630 | const char *, ...) | 1763 | const char *, ...) |
1631 | __attribute__ ((format (printf, 3, 4))); | 1764 | __attribute__ ((format (printf, 4, 5))); |
1632 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) | 1765 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ |
1766 | __LINE__, ## message) | ||
1633 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) | 1767 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) |
1634 | __attribute__ ((format (printf, 3, 4))); | 1768 | __attribute__ ((format (printf, 3, 4))); |
1635 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, | 1769 | extern void __ext4_grp_locked_error(const char *, unsigned int, \ |
1636 | const char *, const char *, ...) | 1770 | struct super_block *, ext4_group_t, \ |
1637 | __attribute__ ((format (printf, 4, 5))); | 1771 | unsigned long, ext4_fsblk_t, \ |
1772 | const char *, ...) | ||
1773 | __attribute__ ((format (printf, 7, 8))); | ||
1774 | #define ext4_grp_locked_error(sb, grp, message...) \ | ||
1775 | __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) | ||
1638 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 1776 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
1639 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1777 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
1640 | __u32 compat); | 1778 | __u32 compat); |
@@ -1768,7 +1906,7 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) | |||
1768 | #define ext4_std_error(sb, errno) \ | 1906 | #define ext4_std_error(sb, errno) \ |
1769 | do { \ | 1907 | do { \ |
1770 | if ((errno)) \ | 1908 | if ((errno)) \ |
1771 | __ext4_std_error((sb), __func__, (errno)); \ | 1909 | __ext4_std_error((sb), __func__, __LINE__, (errno)); \ |
1772 | } while (0) | 1910 | } while (0) |
1773 | 1911 | ||
1774 | #ifdef CONFIG_SMP | 1912 | #ifdef CONFIG_SMP |
@@ -1860,6 +1998,12 @@ static inline void ext4_unlock_group(struct super_block *sb, | |||
1860 | spin_unlock(ext4_group_lock_ptr(sb, group)); | 1998 | spin_unlock(ext4_group_lock_ptr(sb, group)); |
1861 | } | 1999 | } |
1862 | 2000 | ||
2001 | static inline void ext4_mark_super_dirty(struct super_block *sb) | ||
2002 | { | ||
2003 | if (EXT4_SB(sb)->s_journal == NULL) | ||
2004 | sb->s_dirt =1; | ||
2005 | } | ||
2006 | |||
1863 | /* | 2007 | /* |
1864 | * Inodes and files operations | 2008 | * Inodes and files operations |
1865 | */ | 2009 | */ |
@@ -1870,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations; | |||
1870 | /* file.c */ | 2014 | /* file.c */ |
1871 | extern const struct inode_operations ext4_file_inode_operations; | 2015 | extern const struct inode_operations ext4_file_inode_operations; |
1872 | extern const struct file_operations ext4_file_operations; | 2016 | extern const struct file_operations ext4_file_operations; |
2017 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | ||
1873 | 2018 | ||
1874 | /* namei.c */ | 2019 | /* namei.c */ |
1875 | extern const struct inode_operations ext4_dir_inode_operations; | 2020 | extern const struct inode_operations ext4_dir_inode_operations; |
@@ -1883,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1883 | /* block_validity */ | 2028 | /* block_validity */ |
1884 | extern void ext4_release_system_zone(struct super_block *sb); | 2029 | extern void ext4_release_system_zone(struct super_block *sb); |
1885 | extern int ext4_setup_system_zone(struct super_block *sb); | 2030 | extern int ext4_setup_system_zone(struct super_block *sb); |
1886 | extern int __init init_ext4_system_zone(void); | 2031 | extern int __init ext4_init_system_zone(void); |
1887 | extern void exit_ext4_system_zone(void); | 2032 | extern void ext4_exit_system_zone(void); |
1888 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | 2033 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, |
1889 | ext4_fsblk_t start_blk, | 2034 | ext4_fsblk_t start_blk, |
1890 | unsigned int count); | 2035 | unsigned int count); |
@@ -1905,9 +2050,6 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
1905 | ssize_t len); | 2050 | ssize_t len); |
1906 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 2051 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
1907 | struct ext4_map_blocks *map, int flags); | 2052 | struct ext4_map_blocks *map, int flags); |
1908 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, | ||
1909 | sector_t block, unsigned int max_blocks, | ||
1910 | struct buffer_head *bh, int flags); | ||
1911 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2053 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1912 | __u64 start, __u64 len); | 2054 | __u64 start, __u64 len); |
1913 | /* move_extent.c */ | 2055 | /* move_extent.c */ |
@@ -1915,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1915 | __u64 start_orig, __u64 start_donor, | 2057 | __u64 start_orig, __u64 start_donor, |
1916 | __u64 len, __u64 *moved_len); | 2058 | __u64 len, __u64 *moved_len); |
1917 | 2059 | ||
2060 | /* page-io.c */ | ||
2061 | extern int __init ext4_init_pageio(void); | ||
2062 | extern void ext4_exit_pageio(void); | ||
2063 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2064 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | ||
2065 | extern int ext4_end_io_nolock(ext4_io_end_t *io); | ||
2066 | extern void ext4_io_submit(struct ext4_io_submit *io); | ||
2067 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | ||
2068 | struct page *page, | ||
2069 | int len, | ||
2070 | struct writeback_control *wbc); | ||
1918 | 2071 | ||
1919 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2072 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ |
1920 | enum ext4_state_bits { | 2073 | enum ext4_state_bits { |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index bdb6ce7e2eb4..28ce70fd9cd0 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) | |||
225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); | 225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); |
226 | } | 226 | } |
227 | 227 | ||
228 | /* | ||
229 | * ext4_ext_pblock: | ||
230 | * combine low and high parts of physical block number into ext4_fsblk_t | ||
231 | */ | ||
232 | static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex) | ||
233 | { | ||
234 | ext4_fsblk_t block; | ||
235 | |||
236 | block = le32_to_cpu(ex->ee_start_lo); | ||
237 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
238 | return block; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * ext4_idx_pblock: | ||
243 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
244 | */ | ||
245 | static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix) | ||
246 | { | ||
247 | ext4_fsblk_t block; | ||
248 | |||
249 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
250 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
251 | return block; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * ext4_ext_store_pblock: | ||
256 | * stores a large physical block number into an extent struct, | ||
257 | * breaking it into parts | ||
258 | */ | ||
259 | static inline void ext4_ext_store_pblock(struct ext4_extent *ex, | ||
260 | ext4_fsblk_t pb) | ||
261 | { | ||
262 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
263 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
264 | 0xffff); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * ext4_idx_store_pblock: | ||
269 | * stores a large physical block number into an index struct, | ||
270 | * breaking it into parts | ||
271 | */ | ||
272 | static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, | ||
273 | ext4_fsblk_t pb) | ||
274 | { | ||
275 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
276 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
277 | 0xffff); | ||
278 | } | ||
279 | |||
228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 280 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
229 | sector_t lblocks); | 281 | sector_t lblocks); |
230 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); | ||
231 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | ||
232 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | ||
233 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 282 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
234 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | 283 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
235 | int num, | 284 | int num, |
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | |||
237 | extern int ext4_can_extents_be_merged(struct inode *inode, | 286 | extern int ext4_can_extents_be_merged(struct inode *inode, |
238 | struct ext4_extent *ex1, | 287 | struct ext4_extent *ex1, |
239 | struct ext4_extent *ex2); | 288 | struct ext4_extent *ex2); |
240 | extern int ext4_ext_try_to_merge(struct inode *inode, | ||
241 | struct ext4_ext_path *path, | ||
242 | struct ext4_extent *); | ||
243 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | ||
244 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); | 289 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); |
245 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
246 | ext_prepare_callback, void *); | ||
247 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 290 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
248 | struct ext4_ext_path *); | 291 | struct ext4_ext_path *); |
249 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | ||
250 | ext4_lblk_t *, ext4_fsblk_t *); | ||
251 | extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, | ||
252 | ext4_lblk_t *, ext4_fsblk_t *); | ||
253 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 292 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
254 | extern int ext4_ext_check_inode(struct inode *inode); | 293 | extern int ext4_ext_check_inode(struct inode *inode); |
255 | #endif /* _EXT4_EXTENTS */ | 294 | #endif /* _EXT4_EXTENTS */ |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 53d2764d71ca..6e272ef6ba96 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -6,29 +6,29 @@ | |||
6 | 6 | ||
7 | #include <trace/events/ext4.h> | 7 | #include <trace/events/ext4.h> |
8 | 8 | ||
9 | int __ext4_journal_get_undo_access(const char *where, handle_t *handle, | 9 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, |
10 | struct buffer_head *bh) | 10 | handle_t *handle, struct buffer_head *bh) |
11 | { | 11 | { |
12 | int err = 0; | 12 | int err = 0; |
13 | 13 | ||
14 | if (ext4_handle_valid(handle)) { | 14 | if (ext4_handle_valid(handle)) { |
15 | err = jbd2_journal_get_undo_access(handle, bh); | 15 | err = jbd2_journal_get_undo_access(handle, bh); |
16 | if (err) | 16 | if (err) |
17 | ext4_journal_abort_handle(where, __func__, bh, | 17 | ext4_journal_abort_handle(where, line, __func__, bh, |
18 | handle, err); | 18 | handle, err); |
19 | } | 19 | } |
20 | return err; | 20 | return err; |
21 | } | 21 | } |
22 | 22 | ||
23 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, | 23 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
24 | struct buffer_head *bh) | 24 | handle_t *handle, struct buffer_head *bh) |
25 | { | 25 | { |
26 | int err = 0; | 26 | int err = 0; |
27 | 27 | ||
28 | if (ext4_handle_valid(handle)) { | 28 | if (ext4_handle_valid(handle)) { |
29 | err = jbd2_journal_get_write_access(handle, bh); | 29 | err = jbd2_journal_get_write_access(handle, bh); |
30 | if (err) | 30 | if (err) |
31 | ext4_journal_abort_handle(where, __func__, bh, | 31 | ext4_journal_abort_handle(where, line, __func__, bh, |
32 | handle, err); | 32 | handle, err); |
33 | } | 33 | } |
34 | return err; | 34 | return err; |
@@ -46,9 +46,9 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, | |||
46 | * If the handle isn't valid we're not journaling, but we still need to | 46 | * If the handle isn't valid we're not journaling, but we still need to |
47 | * call into ext4_journal_revoke() to put the buffer head. | 47 | * call into ext4_journal_revoke() to put the buffer head. |
48 | */ | 48 | */ |
49 | int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | 49 | int __ext4_forget(const char *where, unsigned int line, handle_t *handle, |
50 | struct inode *inode, struct buffer_head *bh, | 50 | int is_metadata, struct inode *inode, |
51 | ext4_fsblk_t blocknr) | 51 | struct buffer_head *bh, ext4_fsblk_t blocknr) |
52 | { | 52 | { |
53 | int err; | 53 | int err; |
54 | 54 | ||
@@ -79,8 +79,8 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | |||
79 | BUFFER_TRACE(bh, "call jbd2_journal_forget"); | 79 | BUFFER_TRACE(bh, "call jbd2_journal_forget"); |
80 | err = jbd2_journal_forget(handle, bh); | 80 | err = jbd2_journal_forget(handle, bh); |
81 | if (err) | 81 | if (err) |
82 | ext4_journal_abort_handle(where, __func__, bh, | 82 | ext4_journal_abort_handle(where, line, __func__, |
83 | handle, err); | 83 | bh, handle, err); |
84 | return err; | 84 | return err; |
85 | } | 85 | } |
86 | return 0; | 86 | return 0; |
@@ -92,15 +92,16 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | |||
92 | BUFFER_TRACE(bh, "call jbd2_journal_revoke"); | 92 | BUFFER_TRACE(bh, "call jbd2_journal_revoke"); |
93 | err = jbd2_journal_revoke(handle, blocknr, bh); | 93 | err = jbd2_journal_revoke(handle, blocknr, bh); |
94 | if (err) { | 94 | if (err) { |
95 | ext4_journal_abort_handle(where, __func__, bh, handle, err); | 95 | ext4_journal_abort_handle(where, line, __func__, |
96 | ext4_abort(inode->i_sb, __func__, | 96 | bh, handle, err); |
97 | __ext4_abort(inode->i_sb, where, line, | ||
97 | "error %d when attempting revoke", err); | 98 | "error %d when attempting revoke", err); |
98 | } | 99 | } |
99 | BUFFER_TRACE(bh, "exit"); | 100 | BUFFER_TRACE(bh, "exit"); |
100 | return err; | 101 | return err; |
101 | } | 102 | } |
102 | 103 | ||
103 | int __ext4_journal_get_create_access(const char *where, | 104 | int __ext4_journal_get_create_access(const char *where, unsigned int line, |
104 | handle_t *handle, struct buffer_head *bh) | 105 | handle_t *handle, struct buffer_head *bh) |
105 | { | 106 | { |
106 | int err = 0; | 107 | int err = 0; |
@@ -108,22 +109,23 @@ int __ext4_journal_get_create_access(const char *where, | |||
108 | if (ext4_handle_valid(handle)) { | 109 | if (ext4_handle_valid(handle)) { |
109 | err = jbd2_journal_get_create_access(handle, bh); | 110 | err = jbd2_journal_get_create_access(handle, bh); |
110 | if (err) | 111 | if (err) |
111 | ext4_journal_abort_handle(where, __func__, bh, | 112 | ext4_journal_abort_handle(where, line, __func__, |
112 | handle, err); | 113 | bh, handle, err); |
113 | } | 114 | } |
114 | return err; | 115 | return err; |
115 | } | 116 | } |
116 | 117 | ||
117 | int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | 118 | int __ext4_handle_dirty_metadata(const char *where, unsigned int line, |
118 | struct inode *inode, struct buffer_head *bh) | 119 | handle_t *handle, struct inode *inode, |
120 | struct buffer_head *bh) | ||
119 | { | 121 | { |
120 | int err = 0; | 122 | int err = 0; |
121 | 123 | ||
122 | if (ext4_handle_valid(handle)) { | 124 | if (ext4_handle_valid(handle)) { |
123 | err = jbd2_journal_dirty_metadata(handle, bh); | 125 | err = jbd2_journal_dirty_metadata(handle, bh); |
124 | if (err) | 126 | if (err) |
125 | ext4_journal_abort_handle(where, __func__, bh, | 127 | ext4_journal_abort_handle(where, line, __func__, |
126 | handle, err); | 128 | bh, handle, err); |
127 | } else { | 129 | } else { |
128 | if (inode) | 130 | if (inode) |
129 | mark_buffer_dirty_inode(bh, inode); | 131 | mark_buffer_dirty_inode(bh, inode); |
@@ -132,14 +134,33 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
132 | if (inode && inode_needs_sync(inode)) { | 134 | if (inode && inode_needs_sync(inode)) { |
133 | sync_dirty_buffer(bh); | 135 | sync_dirty_buffer(bh); |
134 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 136 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
135 | ext4_error(inode->i_sb, | 137 | struct ext4_super_block *es; |
136 | "IO error syncing inode, " | 138 | |
137 | "inode=%lu, block=%llu", | 139 | es = EXT4_SB(inode->i_sb)->s_es; |
138 | inode->i_ino, | 140 | es->s_last_error_block = |
139 | (unsigned long long) bh->b_blocknr); | 141 | cpu_to_le64(bh->b_blocknr); |
142 | ext4_error_inode(inode, where, line, | ||
143 | bh->b_blocknr, | ||
144 | "IO error syncing itable block"); | ||
140 | err = -EIO; | 145 | err = -EIO; |
141 | } | 146 | } |
142 | } | 147 | } |
143 | } | 148 | } |
144 | return err; | 149 | return err; |
145 | } | 150 | } |
151 | |||
152 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | ||
153 | handle_t *handle, struct super_block *sb) | ||
154 | { | ||
155 | struct buffer_head *bh = EXT4_SB(sb)->s_sbh; | ||
156 | int err = 0; | ||
157 | |||
158 | if (ext4_handle_valid(handle)) { | ||
159 | err = jbd2_journal_dirty_metadata(handle, bh); | ||
160 | if (err) | ||
161 | ext4_journal_abort_handle(where, line, __func__, | ||
162 | bh, handle, err); | ||
163 | } else | ||
164 | sb->s_dirt = 1; | ||
165 | return err; | ||
166 | } | ||
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index dade0c024797..b0bd792c58c5 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -122,39 +122,47 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); | |||
122 | /* | 122 | /* |
123 | * Wrapper functions with which ext4 calls into JBD. | 123 | * Wrapper functions with which ext4 calls into JBD. |
124 | */ | 124 | */ |
125 | void ext4_journal_abort_handle(const char *caller, const char *err_fn, | 125 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
126 | const char *err_fn, | ||
126 | struct buffer_head *bh, handle_t *handle, int err); | 127 | struct buffer_head *bh, handle_t *handle, int err); |
127 | 128 | ||
128 | int __ext4_journal_get_undo_access(const char *where, handle_t *handle, | 129 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, |
129 | struct buffer_head *bh); | 130 | handle_t *handle, struct buffer_head *bh); |
130 | 131 | ||
131 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, | 132 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
132 | struct buffer_head *bh); | 133 | handle_t *handle, struct buffer_head *bh); |
133 | 134 | ||
134 | int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | 135 | int __ext4_forget(const char *where, unsigned int line, handle_t *handle, |
135 | struct inode *inode, struct buffer_head *bh, | 136 | int is_metadata, struct inode *inode, |
136 | ext4_fsblk_t blocknr); | 137 | struct buffer_head *bh, ext4_fsblk_t blocknr); |
137 | 138 | ||
138 | int __ext4_journal_get_create_access(const char *where, | 139 | int __ext4_journal_get_create_access(const char *where, unsigned int line, |
139 | handle_t *handle, struct buffer_head *bh); | 140 | handle_t *handle, struct buffer_head *bh); |
140 | 141 | ||
141 | int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | 142 | int __ext4_handle_dirty_metadata(const char *where, unsigned int line, |
142 | struct inode *inode, struct buffer_head *bh); | 143 | handle_t *handle, struct inode *inode, |
144 | struct buffer_head *bh); | ||
145 | |||
146 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | ||
147 | handle_t *handle, struct super_block *sb); | ||
143 | 148 | ||
144 | #define ext4_journal_get_undo_access(handle, bh) \ | 149 | #define ext4_journal_get_undo_access(handle, bh) \ |
145 | __ext4_journal_get_undo_access(__func__, (handle), (bh)) | 150 | __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh)) |
146 | #define ext4_journal_get_write_access(handle, bh) \ | 151 | #define ext4_journal_get_write_access(handle, bh) \ |
147 | __ext4_journal_get_write_access(__func__, (handle), (bh)) | 152 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) |
148 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ | 153 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ |
149 | __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ | 154 | __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \ |
150 | (block_nr)) | 155 | (bh), (block_nr)) |
151 | #define ext4_journal_get_create_access(handle, bh) \ | 156 | #define ext4_journal_get_create_access(handle, bh) \ |
152 | __ext4_journal_get_create_access(__func__, (handle), (bh)) | 157 | __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh)) |
153 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ | 158 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ |
154 | __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) | 159 | __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ |
160 | (bh)) | ||
161 | #define ext4_handle_dirty_super(handle, sb) \ | ||
162 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) | ||
155 | 163 | ||
156 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); | 164 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); |
157 | int __ext4_journal_stop(const char *where, handle_t *handle); | 165 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); |
158 | 166 | ||
159 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) | 167 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) |
160 | 168 | ||
@@ -207,7 +215,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | |||
207 | } | 215 | } |
208 | 216 | ||
209 | #define ext4_journal_stop(handle) \ | 217 | #define ext4_journal_stop(handle) \ |
210 | __ext4_journal_stop(__func__, (handle)) | 218 | __ext4_journal_stop(__func__, __LINE__, (handle)) |
211 | 219 | ||
212 | static inline handle_t *ext4_journal_current_handle(void) | 220 | static inline handle_t *ext4_journal_current_handle(void) |
213 | { | 221 | { |
@@ -308,17 +316,15 @@ static inline int ext4_should_writeback_data(struct inode *inode) | |||
308 | * This function controls whether or not we should try to go down the | 316 | * This function controls whether or not we should try to go down the |
309 | * dioread_nolock code paths, which makes it safe to avoid taking | 317 | * dioread_nolock code paths, which makes it safe to avoid taking |
310 | * i_mutex for direct I/O reads. This only works for extent-based | 318 | * i_mutex for direct I/O reads. This only works for extent-based |
311 | * files, and it doesn't work for nobh or if data journaling is | 319 | * files, and it doesn't work if data journaling is enabled, since the |
312 | * enabled, since the dioread_nolock code uses b_private to pass | 320 | * dioread_nolock code uses b_private to pass information back to the |
313 | * information back to the I/O completion handler, and this conflicts | 321 | * I/O completion handler, and this conflicts with the jbd's use of |
314 | * with the jbd's use of b_private. | 322 | * b_private. |
315 | */ | 323 | */ |
316 | static inline int ext4_should_dioread_nolock(struct inode *inode) | 324 | static inline int ext4_should_dioread_nolock(struct inode *inode) |
317 | { | 325 | { |
318 | if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) | 326 | if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) |
319 | return 0; | 327 | return 0; |
320 | if (test_opt(inode->i_sb, NOBH)) | ||
321 | return 0; | ||
322 | if (!S_ISREG(inode->i_mode)) | 328 | if (!S_ISREG(inode->i_mode)) |
323 | return 0; | 329 | return 0; |
324 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 330 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 377309c1af65..0554c48cb1fd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,55 +44,6 @@ | |||
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | 46 | ||
47 | |||
48 | /* | ||
49 | * ext_pblock: | ||
50 | * combine low and high parts of physical block number into ext4_fsblk_t | ||
51 | */ | ||
52 | ext4_fsblk_t ext_pblock(struct ext4_extent *ex) | ||
53 | { | ||
54 | ext4_fsblk_t block; | ||
55 | |||
56 | block = le32_to_cpu(ex->ee_start_lo); | ||
57 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
58 | return block; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * idx_pblock: | ||
63 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
64 | */ | ||
65 | ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) | ||
66 | { | ||
67 | ext4_fsblk_t block; | ||
68 | |||
69 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
70 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
71 | return block; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * ext4_ext_store_pblock: | ||
76 | * stores a large physical block number into an extent struct, | ||
77 | * breaking it into parts | ||
78 | */ | ||
79 | void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) | ||
80 | { | ||
81 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
82 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * ext4_idx_store_pblock: | ||
87 | * stores a large physical block number into an index struct, | ||
88 | * breaking it into parts | ||
89 | */ | ||
90 | static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | ||
91 | { | ||
92 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
94 | } | ||
95 | |||
96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 47 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
97 | struct inode *inode, | 48 | struct inode *inode, |
98 | int needed) | 49 | int needed) |
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
169 | /* try to predict block placement */ | 120 | /* try to predict block placement */ |
170 | ex = path[depth].p_ext; | 121 | ex = path[depth].p_ext; |
171 | if (ex) | 122 | if (ex) |
172 | return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); | 123 | return (ext4_ext_pblock(ex) + |
124 | (block - le32_to_cpu(ex->ee_block))); | ||
173 | 125 | ||
174 | /* it looks like index is empty; | 126 | /* it looks like index is empty; |
175 | * try to find starting block from index itself */ | 127 | * try to find starting block from index itself */ |
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
354 | 306 | ||
355 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 307 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
356 | { | 308 | { |
357 | ext4_fsblk_t block = ext_pblock(ext); | 309 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
358 | int len = ext4_ext_get_actual_len(ext); | 310 | int len = ext4_ext_get_actual_len(ext); |
359 | 311 | ||
360 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 312 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
363 | static int ext4_valid_extent_idx(struct inode *inode, | 315 | static int ext4_valid_extent_idx(struct inode *inode, |
364 | struct ext4_extent_idx *ext_idx) | 316 | struct ext4_extent_idx *ext_idx) |
365 | { | 317 | { |
366 | ext4_fsblk_t block = idx_pblock(ext_idx); | 318 | ext4_fsblk_t block = ext4_idx_pblock(ext_idx); |
367 | 319 | ||
368 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); | 320 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); |
369 | } | 321 | } |
@@ -401,9 +353,9 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
401 | return 1; | 353 | return 1; |
402 | } | 354 | } |
403 | 355 | ||
404 | static int __ext4_ext_check(const char *function, struct inode *inode, | 356 | static int __ext4_ext_check(const char *function, unsigned int line, |
405 | struct ext4_extent_header *eh, | 357 | struct inode *inode, struct ext4_extent_header *eh, |
406 | int depth) | 358 | int depth) |
407 | { | 359 | { |
408 | const char *error_msg; | 360 | const char *error_msg; |
409 | int max = 0; | 361 | int max = 0; |
@@ -436,7 +388,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode, | |||
436 | return 0; | 388 | return 0; |
437 | 389 | ||
438 | corrupted: | 390 | corrupted: |
439 | ext4_error_inode(function, inode, | 391 | ext4_error_inode(inode, function, line, 0, |
440 | "bad header/extent: %s - magic %x, " | 392 | "bad header/extent: %s - magic %x, " |
441 | "entries %u, max %u(%u), depth %u(%u)", | 393 | "entries %u, max %u(%u), depth %u(%u)", |
442 | error_msg, le16_to_cpu(eh->eh_magic), | 394 | error_msg, le16_to_cpu(eh->eh_magic), |
@@ -447,7 +399,7 @@ corrupted: | |||
447 | } | 399 | } |
448 | 400 | ||
449 | #define ext4_ext_check(inode, eh, depth) \ | 401 | #define ext4_ext_check(inode, eh, depth) \ |
450 | __ext4_ext_check(__func__, inode, eh, depth) | 402 | __ext4_ext_check(__func__, __LINE__, inode, eh, depth) |
451 | 403 | ||
452 | int ext4_ext_check_inode(struct inode *inode) | 404 | int ext4_ext_check_inode(struct inode *inode) |
453 | { | 405 | { |
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
463 | for (k = 0; k <= l; k++, path++) { | 415 | for (k = 0; k <= l; k++, path++) { |
464 | if (path->p_idx) { | 416 | if (path->p_idx) { |
465 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 417 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
466 | idx_pblock(path->p_idx)); | 418 | ext4_idx_pblock(path->p_idx)); |
467 | } else if (path->p_ext) { | 419 | } else if (path->p_ext) { |
468 | ext_debug(" %d:[%d]%d:%llu ", | 420 | ext_debug(" %d:[%d]%d:%llu ", |
469 | le32_to_cpu(path->p_ext->ee_block), | 421 | le32_to_cpu(path->p_ext->ee_block), |
470 | ext4_ext_is_uninitialized(path->p_ext), | 422 | ext4_ext_is_uninitialized(path->p_ext), |
471 | ext4_ext_get_actual_len(path->p_ext), | 423 | ext4_ext_get_actual_len(path->p_ext), |
472 | ext_pblock(path->p_ext)); | 424 | ext4_ext_pblock(path->p_ext)); |
473 | } else | 425 | } else |
474 | ext_debug(" []"); | 426 | ext_debug(" []"); |
475 | } | 427 | } |
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
494 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 446 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
495 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), | 447 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
496 | ext4_ext_is_uninitialized(ex), | 448 | ext4_ext_is_uninitialized(ex), |
497 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 449 | ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); |
498 | } | 450 | } |
499 | ext_debug("\n"); | 451 | ext_debug("\n"); |
500 | } | 452 | } |
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
545 | 497 | ||
546 | path->p_idx = l - 1; | 498 | path->p_idx = l - 1; |
547 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), | 499 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), |
548 | idx_pblock(path->p_idx)); | 500 | ext4_idx_pblock(path->p_idx)); |
549 | 501 | ||
550 | #ifdef CHECK_BINSEARCH | 502 | #ifdef CHECK_BINSEARCH |
551 | { | 503 | { |
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode, | |||
614 | path->p_ext = l - 1; | 566 | path->p_ext = l - 1; |
615 | ext_debug(" -> %d:%llu:[%d]%d ", | 567 | ext_debug(" -> %d:%llu:[%d]%d ", |
616 | le32_to_cpu(path->p_ext->ee_block), | 568 | le32_to_cpu(path->p_ext->ee_block), |
617 | ext_pblock(path->p_ext), | 569 | ext4_ext_pblock(path->p_ext), |
618 | ext4_ext_is_uninitialized(path->p_ext), | 570 | ext4_ext_is_uninitialized(path->p_ext), |
619 | ext4_ext_get_actual_len(path->p_ext)); | 571 | ext4_ext_get_actual_len(path->p_ext)); |
620 | 572 | ||
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
682 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); | 634 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); |
683 | 635 | ||
684 | ext4_ext_binsearch_idx(inode, path + ppos, block); | 636 | ext4_ext_binsearch_idx(inode, path + ppos, block); |
685 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 637 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
686 | path[ppos].p_depth = i; | 638 | path[ppos].p_depth = i; |
687 | path[ppos].p_ext = NULL; | 639 | path[ppos].p_ext = NULL; |
688 | 640 | ||
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
721 | ext4_ext_binsearch(inode, path + ppos, block); | 673 | ext4_ext_binsearch(inode, path + ppos, block); |
722 | /* if not an empty leaf */ | 674 | /* if not an empty leaf */ |
723 | if (path[ppos].p_ext) | 675 | if (path[ppos].p_ext) |
724 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 676 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
725 | 677 | ||
726 | ext4_ext_show_path(inode, path); | 678 | ext4_ext_show_path(inode, path); |
727 | 679 | ||
@@ -739,9 +691,9 @@ err: | |||
739 | * insert new index [@logical;@ptr] into the block at @curp; | 691 | * insert new index [@logical;@ptr] into the block at @curp; |
740 | * check where to insert: before @curp or after @curp | 692 | * check where to insert: before @curp or after @curp |
741 | */ | 693 | */ |
742 | int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | 694 | static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, |
743 | struct ext4_ext_path *curp, | 695 | struct ext4_ext_path *curp, |
744 | int logical, ext4_fsblk_t ptr) | 696 | int logical, ext4_fsblk_t ptr) |
745 | { | 697 | { |
746 | struct ext4_extent_idx *ix; | 698 | struct ext4_extent_idx *ix; |
747 | int len, err; | 699 | int len, err; |
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
917 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | 869 | EXT_MAX_EXTENT(path[depth].p_hdr)) { |
918 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", | 870 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", |
919 | le32_to_cpu(path[depth].p_ext->ee_block), | 871 | le32_to_cpu(path[depth].p_ext->ee_block), |
920 | ext_pblock(path[depth].p_ext), | 872 | ext4_ext_pblock(path[depth].p_ext), |
921 | ext4_ext_is_uninitialized(path[depth].p_ext), | 873 | ext4_ext_is_uninitialized(path[depth].p_ext), |
922 | ext4_ext_get_actual_len(path[depth].p_ext), | 874 | ext4_ext_get_actual_len(path[depth].p_ext), |
923 | newblock); | 875 | newblock); |
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
1007 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { | 959 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { |
1008 | ext_debug("%d: move %d:%llu in new index %llu\n", i, | 960 | ext_debug("%d: move %d:%llu in new index %llu\n", i, |
1009 | le32_to_cpu(path[i].p_idx->ei_block), | 961 | le32_to_cpu(path[i].p_idx->ei_block), |
1010 | idx_pblock(path[i].p_idx), | 962 | ext4_idx_pblock(path[i].p_idx), |
1011 | newblock); | 963 | newblock); |
1012 | /*memmove(++fidx, path[i].p_idx++, | 964 | /*memmove(++fidx, path[i].p_idx++, |
1013 | sizeof(struct ext4_extent_idx)); | 965 | sizeof(struct ext4_extent_idx)); |
@@ -1083,7 +1035,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1083 | { | 1035 | { |
1084 | struct ext4_ext_path *curp = path; | 1036 | struct ext4_ext_path *curp = path; |
1085 | struct ext4_extent_header *neh; | 1037 | struct ext4_extent_header *neh; |
1086 | struct ext4_extent_idx *fidx; | ||
1087 | struct buffer_head *bh; | 1038 | struct buffer_head *bh; |
1088 | ext4_fsblk_t newblock; | 1039 | ext4_fsblk_t newblock; |
1089 | int err = 0; | 1040 | int err = 0; |
@@ -1144,10 +1095,10 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1144 | ext4_idx_store_pblock(curp->p_idx, newblock); | 1095 | ext4_idx_store_pblock(curp->p_idx, newblock); |
1145 | 1096 | ||
1146 | neh = ext_inode_hdr(inode); | 1097 | neh = ext_inode_hdr(inode); |
1147 | fidx = EXT_FIRST_INDEX(neh); | ||
1148 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", | 1098 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", |
1149 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), | 1099 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), |
1150 | le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); | 1100 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1101 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); | ||
1151 | 1102 | ||
1152 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); | 1103 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); |
1153 | err = ext4_ext_dirty(handle, inode, curp); | 1104 | err = ext4_ext_dirty(handle, inode, curp); |
@@ -1233,9 +1184,9 @@ out: | |||
1233 | * returns 0 at @phys | 1184 | * returns 0 at @phys |
1234 | * return value contains 0 (success) or error code | 1185 | * return value contains 0 (success) or error code |
1235 | */ | 1186 | */ |
1236 | int | 1187 | static int ext4_ext_search_left(struct inode *inode, |
1237 | ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | 1188 | struct ext4_ext_path *path, |
1238 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1189 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1239 | { | 1190 | { |
1240 | struct ext4_extent_idx *ix; | 1191 | struct ext4_extent_idx *ix; |
1241 | struct ext4_extent *ex; | 1192 | struct ext4_extent *ex; |
@@ -1287,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1287 | } | 1238 | } |
1288 | 1239 | ||
1289 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; | 1240 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; |
1290 | *phys = ext_pblock(ex) + ee_len - 1; | 1241 | *phys = ext4_ext_pblock(ex) + ee_len - 1; |
1291 | return 0; | 1242 | return 0; |
1292 | } | 1243 | } |
1293 | 1244 | ||
@@ -1298,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1298 | * returns 0 at @phys | 1249 | * returns 0 at @phys |
1299 | * return value contains 0 (success) or error code | 1250 | * return value contains 0 (success) or error code |
1300 | */ | 1251 | */ |
1301 | int | 1252 | static int ext4_ext_search_right(struct inode *inode, |
1302 | ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | 1253 | struct ext4_ext_path *path, |
1303 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1254 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1304 | { | 1255 | { |
1305 | struct buffer_head *bh = NULL; | 1256 | struct buffer_head *bh = NULL; |
1306 | struct ext4_extent_header *eh; | 1257 | struct ext4_extent_header *eh; |
@@ -1343,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1343 | } | 1294 | } |
1344 | } | 1295 | } |
1345 | *logical = le32_to_cpu(ex->ee_block); | 1296 | *logical = le32_to_cpu(ex->ee_block); |
1346 | *phys = ext_pblock(ex); | 1297 | *phys = ext4_ext_pblock(ex); |
1347 | return 0; | 1298 | return 0; |
1348 | } | 1299 | } |
1349 | 1300 | ||
@@ -1358,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1358 | /* next allocated block in this leaf */ | 1309 | /* next allocated block in this leaf */ |
1359 | ex++; | 1310 | ex++; |
1360 | *logical = le32_to_cpu(ex->ee_block); | 1311 | *logical = le32_to_cpu(ex->ee_block); |
1361 | *phys = ext_pblock(ex); | 1312 | *phys = ext4_ext_pblock(ex); |
1362 | return 0; | 1313 | return 0; |
1363 | } | 1314 | } |
1364 | 1315 | ||
@@ -1377,7 +1328,7 @@ got_index: | |||
1377 | * follow it and find the closest allocated | 1328 | * follow it and find the closest allocated |
1378 | * block to the right */ | 1329 | * block to the right */ |
1379 | ix++; | 1330 | ix++; |
1380 | block = idx_pblock(ix); | 1331 | block = ext4_idx_pblock(ix); |
1381 | while (++depth < path->p_depth) { | 1332 | while (++depth < path->p_depth) { |
1382 | bh = sb_bread(inode->i_sb, block); | 1333 | bh = sb_bread(inode->i_sb, block); |
1383 | if (bh == NULL) | 1334 | if (bh == NULL) |
@@ -1389,7 +1340,7 @@ got_index: | |||
1389 | return -EIO; | 1340 | return -EIO; |
1390 | } | 1341 | } |
1391 | ix = EXT_FIRST_INDEX(eh); | 1342 | ix = EXT_FIRST_INDEX(eh); |
1392 | block = idx_pblock(ix); | 1343 | block = ext4_idx_pblock(ix); |
1393 | put_bh(bh); | 1344 | put_bh(bh); |
1394 | } | 1345 | } |
1395 | 1346 | ||
@@ -1403,7 +1354,7 @@ got_index: | |||
1403 | } | 1354 | } |
1404 | ex = EXT_FIRST_EXTENT(eh); | 1355 | ex = EXT_FIRST_EXTENT(eh); |
1405 | *logical = le32_to_cpu(ex->ee_block); | 1356 | *logical = le32_to_cpu(ex->ee_block); |
1406 | *phys = ext_pblock(ex); | 1357 | *phys = ext4_ext_pblock(ex); |
1407 | put_bh(bh); | 1358 | put_bh(bh); |
1408 | return 0; | 1359 | return 0; |
1409 | } | 1360 | } |
@@ -1574,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1574 | return 0; | 1525 | return 0; |
1575 | #endif | 1526 | #endif |
1576 | 1527 | ||
1577 | if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) | 1528 | if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) |
1578 | return 1; | 1529 | return 1; |
1579 | return 0; | 1530 | return 0; |
1580 | } | 1531 | } |
@@ -1586,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1586 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns | 1537 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns |
1587 | * 1 if they got merged. | 1538 | * 1 if they got merged. |
1588 | */ | 1539 | */ |
1589 | int ext4_ext_try_to_merge(struct inode *inode, | 1540 | static int ext4_ext_try_to_merge(struct inode *inode, |
1590 | struct ext4_ext_path *path, | 1541 | struct ext4_ext_path *path, |
1591 | struct ext4_extent *ex) | 1542 | struct ext4_extent *ex) |
1592 | { | 1543 | { |
1593 | struct ext4_extent_header *eh; | 1544 | struct ext4_extent_header *eh; |
1594 | unsigned int depth, len; | 1545 | unsigned int depth, len; |
@@ -1633,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1633 | * such that there will be no overlap, and then returns 1. | 1584 | * such that there will be no overlap, and then returns 1. |
1634 | * If there is no overlap found, it returns 0. | 1585 | * If there is no overlap found, it returns 0. |
1635 | */ | 1586 | */ |
1636 | unsigned int ext4_ext_check_overlap(struct inode *inode, | 1587 | static unsigned int ext4_ext_check_overlap(struct inode *inode, |
1637 | struct ext4_extent *newext, | 1588 | struct ext4_extent *newext, |
1638 | struct ext4_ext_path *path) | 1589 | struct ext4_ext_path *path) |
1639 | { | 1590 | { |
1640 | ext4_lblk_t b1, b2; | 1591 | ext4_lblk_t b1, b2; |
1641 | unsigned int depth, len1; | 1592 | unsigned int depth, len1; |
@@ -1707,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1707 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) | 1658 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) |
1708 | && ext4_can_extents_be_merged(inode, ex, newext)) { | 1659 | && ext4_can_extents_be_merged(inode, ex, newext)) { |
1709 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", | 1660 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1710 | ext4_ext_is_uninitialized(newext), | 1661 | ext4_ext_is_uninitialized(newext), |
1711 | ext4_ext_get_actual_len(newext), | 1662 | ext4_ext_get_actual_len(newext), |
1712 | le32_to_cpu(ex->ee_block), | 1663 | le32_to_cpu(ex->ee_block), |
1713 | ext4_ext_is_uninitialized(ex), | 1664 | ext4_ext_is_uninitialized(ex), |
1714 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1665 | ext4_ext_get_actual_len(ex), |
1666 | ext4_ext_pblock(ex)); | ||
1715 | err = ext4_ext_get_access(handle, inode, path + depth); | 1667 | err = ext4_ext_get_access(handle, inode, path + depth); |
1716 | if (err) | 1668 | if (err) |
1717 | return err; | 1669 | return err; |
@@ -1781,7 +1733,7 @@ has_space: | |||
1781 | /* there is no extent in this leaf, create first one */ | 1733 | /* there is no extent in this leaf, create first one */ |
1782 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", | 1734 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
1783 | le32_to_cpu(newext->ee_block), | 1735 | le32_to_cpu(newext->ee_block), |
1784 | ext_pblock(newext), | 1736 | ext4_ext_pblock(newext), |
1785 | ext4_ext_is_uninitialized(newext), | 1737 | ext4_ext_is_uninitialized(newext), |
1786 | ext4_ext_get_actual_len(newext)); | 1738 | ext4_ext_get_actual_len(newext)); |
1787 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1739 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
@@ -1795,7 +1747,7 @@ has_space: | |||
1795 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " | 1747 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
1796 | "move %d from 0x%p to 0x%p\n", | 1748 | "move %d from 0x%p to 0x%p\n", |
1797 | le32_to_cpu(newext->ee_block), | 1749 | le32_to_cpu(newext->ee_block), |
1798 | ext_pblock(newext), | 1750 | ext4_ext_pblock(newext), |
1799 | ext4_ext_is_uninitialized(newext), | 1751 | ext4_ext_is_uninitialized(newext), |
1800 | ext4_ext_get_actual_len(newext), | 1752 | ext4_ext_get_actual_len(newext), |
1801 | nearex, len, nearex + 1, nearex + 2); | 1753 | nearex, len, nearex + 1, nearex + 2); |
@@ -1809,7 +1761,7 @@ has_space: | |||
1809 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " | 1761 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
1810 | "move %d from 0x%p to 0x%p\n", | 1762 | "move %d from 0x%p to 0x%p\n", |
1811 | le32_to_cpu(newext->ee_block), | 1763 | le32_to_cpu(newext->ee_block), |
1812 | ext_pblock(newext), | 1764 | ext4_ext_pblock(newext), |
1813 | ext4_ext_is_uninitialized(newext), | 1765 | ext4_ext_is_uninitialized(newext), |
1814 | ext4_ext_get_actual_len(newext), | 1766 | ext4_ext_get_actual_len(newext), |
1815 | nearex, len, nearex + 1, nearex + 2); | 1767 | nearex, len, nearex + 1, nearex + 2); |
@@ -1820,7 +1772,7 @@ has_space: | |||
1820 | le16_add_cpu(&eh->eh_entries, 1); | 1772 | le16_add_cpu(&eh->eh_entries, 1); |
1821 | nearex = path[depth].p_ext; | 1773 | nearex = path[depth].p_ext; |
1822 | nearex->ee_block = newext->ee_block; | 1774 | nearex->ee_block = newext->ee_block; |
1823 | ext4_ext_store_pblock(nearex, ext_pblock(newext)); | 1775 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); |
1824 | nearex->ee_len = newext->ee_len; | 1776 | nearex->ee_len = newext->ee_len; |
1825 | 1777 | ||
1826 | merge: | 1778 | merge: |
@@ -1846,9 +1798,9 @@ cleanup: | |||
1846 | return err; | 1798 | return err; |
1847 | } | 1799 | } |
1848 | 1800 | ||
1849 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | 1801 | static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, |
1850 | ext4_lblk_t num, ext_prepare_callback func, | 1802 | ext4_lblk_t num, ext_prepare_callback func, |
1851 | void *cbdata) | 1803 | void *cbdata) |
1852 | { | 1804 | { |
1853 | struct ext4_ext_path *path = NULL; | 1805 | struct ext4_ext_path *path = NULL; |
1854 | struct ext4_ext_cache cbex; | 1806 | struct ext4_ext_cache cbex; |
@@ -1924,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1924 | } else { | 1876 | } else { |
1925 | cbex.ec_block = le32_to_cpu(ex->ee_block); | 1877 | cbex.ec_block = le32_to_cpu(ex->ee_block); |
1926 | cbex.ec_len = ext4_ext_get_actual_len(ex); | 1878 | cbex.ec_len = ext4_ext_get_actual_len(ex); |
1927 | cbex.ec_start = ext_pblock(ex); | 1879 | cbex.ec_start = ext4_ext_pblock(ex); |
1928 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | 1880 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; |
1929 | } | 1881 | } |
1930 | 1882 | ||
@@ -2074,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2074 | 2026 | ||
2075 | /* free index block */ | 2027 | /* free index block */ |
2076 | path--; | 2028 | path--; |
2077 | leaf = idx_pblock(path->p_idx); | 2029 | leaf = ext4_idx_pblock(path->p_idx); |
2078 | if (unlikely(path->p_hdr->eh_entries == 0)) { | 2030 | if (unlikely(path->p_hdr->eh_entries == 0)) { |
2079 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); | 2031 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); |
2080 | return -EIO; | 2032 | return -EIO; |
@@ -2182,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2182 | ext4_fsblk_t start; | 2134 | ext4_fsblk_t start; |
2183 | 2135 | ||
2184 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2136 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2185 | start = ext_pblock(ex) + ee_len - num; | 2137 | start = ext4_ext_pblock(ex) + ee_len - num; |
2186 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2138 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2187 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2139 | ext4_free_blocks(handle, inode, 0, start, num, flags); |
2188 | } else if (from == le32_to_cpu(ex->ee_block) | 2140 | } else if (from == le32_to_cpu(ex->ee_block) |
@@ -2311,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2311 | goto out; | 2263 | goto out; |
2312 | 2264 | ||
2313 | ext_debug("new extent: %u:%u:%llu\n", block, num, | 2265 | ext_debug("new extent: %u:%u:%llu\n", block, num, |
2314 | ext_pblock(ex)); | 2266 | ext4_ext_pblock(ex)); |
2315 | ex--; | 2267 | ex--; |
2316 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2268 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2317 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2269 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2422,9 +2374,9 @@ again: | |||
2422 | struct buffer_head *bh; | 2374 | struct buffer_head *bh; |
2423 | /* go to the next level */ | 2375 | /* go to the next level */ |
2424 | ext_debug("move to level %d (block %llu)\n", | 2376 | ext_debug("move to level %d (block %llu)\n", |
2425 | i + 1, idx_pblock(path[i].p_idx)); | 2377 | i + 1, ext4_idx_pblock(path[i].p_idx)); |
2426 | memset(path + i + 1, 0, sizeof(*path)); | 2378 | memset(path + i + 1, 0, sizeof(*path)); |
2427 | bh = sb_bread(sb, idx_pblock(path[i].p_idx)); | 2379 | bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); |
2428 | if (!bh) { | 2380 | if (!bh) { |
2429 | /* should we reset i_size? */ | 2381 | /* should we reset i_size? */ |
2430 | err = -EIO; | 2382 | err = -EIO; |
@@ -2536,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb) | |||
2536 | #endif | 2488 | #endif |
2537 | } | 2489 | } |
2538 | 2490 | ||
2539 | static void bi_complete(struct bio *bio, int error) | ||
2540 | { | ||
2541 | complete((struct completion *)bio->bi_private); | ||
2542 | } | ||
2543 | |||
2544 | /* FIXME!! we need to try to merge to left or right after zero-out */ | 2491 | /* FIXME!! we need to try to merge to left or right after zero-out */ |
2545 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | 2492 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) |
2546 | { | 2493 | { |
2494 | ext4_fsblk_t ee_pblock; | ||
2495 | unsigned int ee_len; | ||
2547 | int ret; | 2496 | int ret; |
2548 | struct bio *bio; | ||
2549 | int blkbits, blocksize; | ||
2550 | sector_t ee_pblock; | ||
2551 | struct completion event; | ||
2552 | unsigned int ee_len, len, done, offset; | ||
2553 | 2497 | ||
2554 | |||
2555 | blkbits = inode->i_blkbits; | ||
2556 | blocksize = inode->i_sb->s_blocksize; | ||
2557 | ee_len = ext4_ext_get_actual_len(ex); | 2498 | ee_len = ext4_ext_get_actual_len(ex); |
2558 | ee_pblock = ext_pblock(ex); | 2499 | ee_pblock = ext4_ext_pblock(ex); |
2559 | |||
2560 | /* convert ee_pblock to 512 byte sectors */ | ||
2561 | ee_pblock = ee_pblock << (blkbits - 9); | ||
2562 | |||
2563 | while (ee_len > 0) { | ||
2564 | |||
2565 | if (ee_len > BIO_MAX_PAGES) | ||
2566 | len = BIO_MAX_PAGES; | ||
2567 | else | ||
2568 | len = ee_len; | ||
2569 | |||
2570 | bio = bio_alloc(GFP_NOIO, len); | ||
2571 | if (!bio) | ||
2572 | return -ENOMEM; | ||
2573 | |||
2574 | bio->bi_sector = ee_pblock; | ||
2575 | bio->bi_bdev = inode->i_sb->s_bdev; | ||
2576 | |||
2577 | done = 0; | ||
2578 | offset = 0; | ||
2579 | while (done < len) { | ||
2580 | ret = bio_add_page(bio, ZERO_PAGE(0), | ||
2581 | blocksize, offset); | ||
2582 | if (ret != blocksize) { | ||
2583 | /* | ||
2584 | * We can't add any more pages because of | ||
2585 | * hardware limitations. Start a new bio. | ||
2586 | */ | ||
2587 | break; | ||
2588 | } | ||
2589 | done++; | ||
2590 | offset += blocksize; | ||
2591 | if (offset >= PAGE_CACHE_SIZE) | ||
2592 | offset = 0; | ||
2593 | } | ||
2594 | 2500 | ||
2595 | init_completion(&event); | 2501 | ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); |
2596 | bio->bi_private = &event; | 2502 | if (ret > 0) |
2597 | bio->bi_end_io = bi_complete; | 2503 | ret = 0; |
2598 | submit_bio(WRITE, bio); | ||
2599 | wait_for_completion(&event); | ||
2600 | 2504 | ||
2601 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 2505 | return ret; |
2602 | bio_put(bio); | ||
2603 | return -EIO; | ||
2604 | } | ||
2605 | bio_put(bio); | ||
2606 | ee_len -= done; | ||
2607 | ee_pblock += done << (blkbits - 9); | ||
2608 | } | ||
2609 | return 0; | ||
2610 | } | 2506 | } |
2611 | 2507 | ||
2612 | #define EXT4_EXT_ZERO_LEN 7 | 2508 | #define EXT4_EXT_ZERO_LEN 7 |
@@ -2652,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2652 | ee_block = le32_to_cpu(ex->ee_block); | 2548 | ee_block = le32_to_cpu(ex->ee_block); |
2653 | ee_len = ext4_ext_get_actual_len(ex); | 2549 | ee_len = ext4_ext_get_actual_len(ex); |
2654 | allocated = ee_len - (map->m_lblk - ee_block); | 2550 | allocated = ee_len - (map->m_lblk - ee_block); |
2655 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | 2551 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); |
2656 | 2552 | ||
2657 | ex2 = ex; | 2553 | ex2 = ex; |
2658 | orig_ex.ee_block = ex->ee_block; | 2554 | orig_ex.ee_block = ex->ee_block; |
2659 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2555 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2660 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2556 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); |
2661 | 2557 | ||
2662 | /* | 2558 | /* |
2663 | * It is safe to convert extent to initialized via explicit | 2559 | * It is safe to convert extent to initialized via explicit |
@@ -2676,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2676 | /* update the extent length and mark as initialized */ | 2572 | /* update the extent length and mark as initialized */ |
2677 | ex->ee_block = orig_ex.ee_block; | 2573 | ex->ee_block = orig_ex.ee_block; |
2678 | ex->ee_len = orig_ex.ee_len; | 2574 | ex->ee_len = orig_ex.ee_len; |
2679 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2575 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2680 | ext4_ext_dirty(handle, inode, path + depth); | 2576 | ext4_ext_dirty(handle, inode, path + depth); |
2681 | /* zeroed the full extent */ | 2577 | /* zeroed the full extent */ |
2682 | return allocated; | 2578 | return allocated; |
@@ -2711,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2711 | ex->ee_block = orig_ex.ee_block; | 2607 | ex->ee_block = orig_ex.ee_block; |
2712 | ex->ee_len = cpu_to_le16(ee_len - allocated); | 2608 | ex->ee_len = cpu_to_le16(ee_len - allocated); |
2713 | ext4_ext_mark_uninitialized(ex); | 2609 | ext4_ext_mark_uninitialized(ex); |
2714 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2610 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2715 | ext4_ext_dirty(handle, inode, path + depth); | 2611 | ext4_ext_dirty(handle, inode, path + depth); |
2716 | 2612 | ||
2717 | ex3 = &newex; | 2613 | ex3 = &newex; |
@@ -2726,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2726 | goto fix_extent_len; | 2622 | goto fix_extent_len; |
2727 | ex->ee_block = orig_ex.ee_block; | 2623 | ex->ee_block = orig_ex.ee_block; |
2728 | ex->ee_len = orig_ex.ee_len; | 2624 | ex->ee_len = orig_ex.ee_len; |
2729 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2625 | ext4_ext_store_pblock(ex, |
2626 | ext4_ext_pblock(&orig_ex)); | ||
2730 | ext4_ext_dirty(handle, inode, path + depth); | 2627 | ext4_ext_dirty(handle, inode, path + depth); |
2731 | /* blocks available from map->m_lblk */ | 2628 | /* blocks available from map->m_lblk */ |
2732 | return allocated; | 2629 | return allocated; |
@@ -2783,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2783 | /* update the extent length and mark as initialized */ | 2680 | /* update the extent length and mark as initialized */ |
2784 | ex->ee_block = orig_ex.ee_block; | 2681 | ex->ee_block = orig_ex.ee_block; |
2785 | ex->ee_len = orig_ex.ee_len; | 2682 | ex->ee_len = orig_ex.ee_len; |
2786 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2683 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2787 | ext4_ext_dirty(handle, inode, path + depth); | 2684 | ext4_ext_dirty(handle, inode, path + depth); |
2788 | /* zeroed the full extent */ | 2685 | /* zeroed the full extent */ |
2789 | /* blocks available from map->m_lblk */ | 2686 | /* blocks available from map->m_lblk */ |
@@ -2834,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2834 | /* update the extent length and mark as initialized */ | 2731 | /* update the extent length and mark as initialized */ |
2835 | ex->ee_block = orig_ex.ee_block; | 2732 | ex->ee_block = orig_ex.ee_block; |
2836 | ex->ee_len = orig_ex.ee_len; | 2733 | ex->ee_len = orig_ex.ee_len; |
2837 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2734 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2838 | ext4_ext_dirty(handle, inode, path + depth); | 2735 | ext4_ext_dirty(handle, inode, path + depth); |
2839 | /* zero out the first half */ | 2736 | /* zero out the first half */ |
2840 | /* blocks available from map->m_lblk */ | 2737 | /* blocks available from map->m_lblk */ |
@@ -2903,7 +2800,7 @@ insert: | |||
2903 | /* update the extent length and mark as initialized */ | 2800 | /* update the extent length and mark as initialized */ |
2904 | ex->ee_block = orig_ex.ee_block; | 2801 | ex->ee_block = orig_ex.ee_block; |
2905 | ex->ee_len = orig_ex.ee_len; | 2802 | ex->ee_len = orig_ex.ee_len; |
2906 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2803 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2907 | ext4_ext_dirty(handle, inode, path + depth); | 2804 | ext4_ext_dirty(handle, inode, path + depth); |
2908 | /* zero out the first half */ | 2805 | /* zero out the first half */ |
2909 | return allocated; | 2806 | return allocated; |
@@ -2916,7 +2813,7 @@ out: | |||
2916 | fix_extent_len: | 2813 | fix_extent_len: |
2917 | ex->ee_block = orig_ex.ee_block; | 2814 | ex->ee_block = orig_ex.ee_block; |
2918 | ex->ee_len = orig_ex.ee_len; | 2815 | ex->ee_len = orig_ex.ee_len; |
2919 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2816 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2920 | ext4_ext_mark_uninitialized(ex); | 2817 | ext4_ext_mark_uninitialized(ex); |
2921 | ext4_ext_dirty(handle, inode, path + depth); | 2818 | ext4_ext_dirty(handle, inode, path + depth); |
2922 | return err; | 2819 | return err; |
@@ -2937,7 +2834,7 @@ fix_extent_len: | |||
2937 | * One of more index blocks maybe needed if the extent tree grow after | 2834 | * One of more index blocks maybe needed if the extent tree grow after |
2938 | * the unintialized extent split. To prevent ENOSPC occur at the IO | 2835 | * the unintialized extent split. To prevent ENOSPC occur at the IO |
2939 | * complete, we need to split the uninitialized extent before DIO submit | 2836 | * complete, we need to split the uninitialized extent before DIO submit |
2940 | * the IO. The uninitilized extent called at this time will be split | 2837 | * the IO. The uninitialized extent called at this time will be split |
2941 | * into three uninitialized extent(at most). After IO complete, the part | 2838 | * into three uninitialized extent(at most). After IO complete, the part |
2942 | * being filled will be convert to initialized by the end_io callback function | 2839 | * being filled will be convert to initialized by the end_io callback function |
2943 | * via ext4_convert_unwritten_extents(). | 2840 | * via ext4_convert_unwritten_extents(). |
@@ -2954,7 +2851,6 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2954 | struct ext4_extent *ex1 = NULL; | 2851 | struct ext4_extent *ex1 = NULL; |
2955 | struct ext4_extent *ex2 = NULL; | 2852 | struct ext4_extent *ex2 = NULL; |
2956 | struct ext4_extent *ex3 = NULL; | 2853 | struct ext4_extent *ex3 = NULL; |
2957 | struct ext4_extent_header *eh; | ||
2958 | ext4_lblk_t ee_block, eof_block; | 2854 | ext4_lblk_t ee_block, eof_block; |
2959 | unsigned int allocated, ee_len, depth; | 2855 | unsigned int allocated, ee_len, depth; |
2960 | ext4_fsblk_t newblock; | 2856 | ext4_fsblk_t newblock; |
@@ -2971,17 +2867,16 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2971 | eof_block = map->m_lblk + map->m_len; | 2867 | eof_block = map->m_lblk + map->m_len; |
2972 | 2868 | ||
2973 | depth = ext_depth(inode); | 2869 | depth = ext_depth(inode); |
2974 | eh = path[depth].p_hdr; | ||
2975 | ex = path[depth].p_ext; | 2870 | ex = path[depth].p_ext; |
2976 | ee_block = le32_to_cpu(ex->ee_block); | 2871 | ee_block = le32_to_cpu(ex->ee_block); |
2977 | ee_len = ext4_ext_get_actual_len(ex); | 2872 | ee_len = ext4_ext_get_actual_len(ex); |
2978 | allocated = ee_len - (map->m_lblk - ee_block); | 2873 | allocated = ee_len - (map->m_lblk - ee_block); |
2979 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | 2874 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); |
2980 | 2875 | ||
2981 | ex2 = ex; | 2876 | ex2 = ex; |
2982 | orig_ex.ee_block = ex->ee_block; | 2877 | orig_ex.ee_block = ex->ee_block; |
2983 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2878 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2984 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2879 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); |
2985 | 2880 | ||
2986 | /* | 2881 | /* |
2987 | * It is safe to convert extent to initialized via explicit | 2882 | * It is safe to convert extent to initialized via explicit |
@@ -3030,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3030 | /* update the extent length and mark as initialized */ | 2925 | /* update the extent length and mark as initialized */ |
3031 | ex->ee_block = orig_ex.ee_block; | 2926 | ex->ee_block = orig_ex.ee_block; |
3032 | ex->ee_len = orig_ex.ee_len; | 2927 | ex->ee_len = orig_ex.ee_len; |
3033 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2928 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3034 | ext4_ext_dirty(handle, inode, path + depth); | 2929 | ext4_ext_dirty(handle, inode, path + depth); |
3035 | /* zeroed the full extent */ | 2930 | /* zeroed the full extent */ |
3036 | /* blocks available from map->m_lblk */ | 2931 | /* blocks available from map->m_lblk */ |
@@ -3058,7 +2953,6 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3058 | err = PTR_ERR(path); | 2953 | err = PTR_ERR(path); |
3059 | goto out; | 2954 | goto out; |
3060 | } | 2955 | } |
3061 | eh = path[depth].p_hdr; | ||
3062 | ex = path[depth].p_ext; | 2956 | ex = path[depth].p_ext; |
3063 | if (ex2 != &newex) | 2957 | if (ex2 != &newex) |
3064 | ex2 = ex; | 2958 | ex2 = ex; |
@@ -3103,7 +2997,7 @@ insert: | |||
3103 | /* update the extent length and mark as initialized */ | 2997 | /* update the extent length and mark as initialized */ |
3104 | ex->ee_block = orig_ex.ee_block; | 2998 | ex->ee_block = orig_ex.ee_block; |
3105 | ex->ee_len = orig_ex.ee_len; | 2999 | ex->ee_len = orig_ex.ee_len; |
3106 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3000 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3107 | ext4_ext_dirty(handle, inode, path + depth); | 3001 | ext4_ext_dirty(handle, inode, path + depth); |
3108 | /* zero out the first half */ | 3002 | /* zero out the first half */ |
3109 | return allocated; | 3003 | return allocated; |
@@ -3116,7 +3010,7 @@ out: | |||
3116 | fix_extent_len: | 3010 | fix_extent_len: |
3117 | ex->ee_block = orig_ex.ee_block; | 3011 | ex->ee_block = orig_ex.ee_block; |
3118 | ex->ee_len = orig_ex.ee_len; | 3012 | ex->ee_len = orig_ex.ee_len; |
3119 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3013 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3120 | ext4_ext_mark_uninitialized(ex); | 3014 | ext4_ext_mark_uninitialized(ex); |
3121 | ext4_ext_dirty(handle, inode, path + depth); | 3015 | ext4_ext_dirty(handle, inode, path + depth); |
3122 | return err; | 3016 | return err; |
@@ -3184,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, | |||
3184 | unmap_underlying_metadata(bdev, block + i); | 3078 | unmap_underlying_metadata(bdev, block + i); |
3185 | } | 3079 | } |
3186 | 3080 | ||
3081 | /* | ||
3082 | * Handle EOFBLOCKS_FL flag, clearing it if necessary | ||
3083 | */ | ||
3084 | static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | ||
3085 | struct ext4_map_blocks *map, | ||
3086 | struct ext4_ext_path *path, | ||
3087 | unsigned int len) | ||
3088 | { | ||
3089 | int i, depth; | ||
3090 | struct ext4_extent_header *eh; | ||
3091 | struct ext4_extent *ex, *last_ex; | ||
3092 | |||
3093 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
3094 | return 0; | ||
3095 | |||
3096 | depth = ext_depth(inode); | ||
3097 | eh = path[depth].p_hdr; | ||
3098 | ex = path[depth].p_ext; | ||
3099 | |||
3100 | if (unlikely(!eh->eh_entries)) { | ||
3101 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | ||
3102 | "EOFBLOCKS_FL set"); | ||
3103 | return -EIO; | ||
3104 | } | ||
3105 | last_ex = EXT_LAST_EXTENT(eh); | ||
3106 | /* | ||
3107 | * We should clear the EOFBLOCKS_FL flag if we are writing the | ||
3108 | * last block in the last extent in the file. We test this by | ||
3109 | * first checking to see if the caller to | ||
3110 | * ext4_ext_get_blocks() was interested in the last block (or | ||
3111 | * a block beyond the last block) in the current extent. If | ||
3112 | * this turns out to be false, we can bail out from this | ||
3113 | * function immediately. | ||
3114 | */ | ||
3115 | if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + | ||
3116 | ext4_ext_get_actual_len(last_ex)) | ||
3117 | return 0; | ||
3118 | /* | ||
3119 | * If the caller does appear to be planning to write at or | ||
3120 | * beyond the end of the current extent, we then test to see | ||
3121 | * if the current extent is the last extent in the file, by | ||
3122 | * checking to make sure it was reached via the rightmost node | ||
3123 | * at each level of the tree. | ||
3124 | */ | ||
3125 | for (i = depth-1; i >= 0; i--) | ||
3126 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3127 | return 0; | ||
3128 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3129 | return ext4_mark_inode_dirty(handle, inode); | ||
3130 | } | ||
3131 | |||
3187 | static int | 3132 | static int |
3188 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3133 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3189 | struct ext4_map_blocks *map, | 3134 | struct ext4_map_blocks *map, |
@@ -3210,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3210 | * completed | 3155 | * completed |
3211 | */ | 3156 | */ |
3212 | if (io) | 3157 | if (io) |
3213 | io->flag = EXT4_IO_UNWRITTEN; | 3158 | io->flag = EXT4_IO_END_UNWRITTEN; |
3214 | else | 3159 | else |
3215 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3160 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3216 | if (ext4_should_dioread_nolock(inode)) | 3161 | if (ext4_should_dioread_nolock(inode)) |
@@ -3221,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3221 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { | 3166 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { |
3222 | ret = ext4_convert_unwritten_extents_endio(handle, inode, | 3167 | ret = ext4_convert_unwritten_extents_endio(handle, inode, |
3223 | path); | 3168 | path); |
3224 | if (ret >= 0) | 3169 | if (ret >= 0) { |
3225 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3170 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3171 | err = check_eofblocks_fl(handle, inode, map, path, | ||
3172 | map->m_len); | ||
3173 | } else | ||
3174 | err = ret; | ||
3226 | goto out2; | 3175 | goto out2; |
3227 | } | 3176 | } |
3228 | /* buffered IO case */ | 3177 | /* buffered IO case */ |
@@ -3248,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3248 | 3197 | ||
3249 | /* buffered write, writepage time, convert*/ | 3198 | /* buffered write, writepage time, convert*/ |
3250 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); | 3199 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
3251 | if (ret >= 0) | 3200 | if (ret >= 0) { |
3252 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3201 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3202 | err = check_eofblocks_fl(handle, inode, map, path, map->m_len); | ||
3203 | if (err < 0) | ||
3204 | goto out2; | ||
3205 | } | ||
3206 | |||
3253 | out: | 3207 | out: |
3254 | if (ret <= 0) { | 3208 | if (ret <= 0) { |
3255 | err = ret; | 3209 | err = ret; |
@@ -3296,6 +3250,7 @@ out2: | |||
3296 | } | 3250 | } |
3297 | return err ? err : allocated; | 3251 | return err ? err : allocated; |
3298 | } | 3252 | } |
3253 | |||
3299 | /* | 3254 | /* |
3300 | * Block allocation/map/preallocation routine for extents based files | 3255 | * Block allocation/map/preallocation routine for extents based files |
3301 | * | 3256 | * |
@@ -3319,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3319 | { | 3274 | { |
3320 | struct ext4_ext_path *path = NULL; | 3275 | struct ext4_ext_path *path = NULL; |
3321 | struct ext4_extent_header *eh; | 3276 | struct ext4_extent_header *eh; |
3322 | struct ext4_extent newex, *ex, *last_ex; | 3277 | struct ext4_extent newex, *ex; |
3323 | ext4_fsblk_t newblock; | 3278 | ext4_fsblk_t newblock; |
3324 | int i, err = 0, depth, ret, cache_type; | 3279 | int err = 0, depth, ret, cache_type; |
3325 | unsigned int allocated = 0; | 3280 | unsigned int allocated = 0; |
3326 | struct ext4_allocation_request ar; | 3281 | struct ext4_allocation_request ar; |
3327 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3282 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
@@ -3345,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3345 | /* block is already allocated */ | 3300 | /* block is already allocated */ |
3346 | newblock = map->m_lblk | 3301 | newblock = map->m_lblk |
3347 | - le32_to_cpu(newex.ee_block) | 3302 | - le32_to_cpu(newex.ee_block) |
3348 | + ext_pblock(&newex); | 3303 | + ext4_ext_pblock(&newex); |
3349 | /* number of remaining blocks in the extent */ | 3304 | /* number of remaining blocks in the extent */ |
3350 | allocated = ext4_ext_get_actual_len(&newex) - | 3305 | allocated = ext4_ext_get_actual_len(&newex) - |
3351 | (map->m_lblk - le32_to_cpu(newex.ee_block)); | 3306 | (map->m_lblk - le32_to_cpu(newex.ee_block)); |
@@ -3383,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3383 | ex = path[depth].p_ext; | 3338 | ex = path[depth].p_ext; |
3384 | if (ex) { | 3339 | if (ex) { |
3385 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 3340 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
3386 | ext4_fsblk_t ee_start = ext_pblock(ex); | 3341 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
3387 | unsigned short ee_len; | 3342 | unsigned short ee_len; |
3388 | 3343 | ||
3389 | /* | 3344 | /* |
@@ -3492,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3492 | */ | 3447 | */ |
3493 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3448 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3494 | if (io) | 3449 | if (io) |
3495 | io->flag = EXT4_IO_UNWRITTEN; | 3450 | io->flag = EXT4_IO_END_UNWRITTEN; |
3496 | else | 3451 | else |
3497 | ext4_set_inode_state(inode, | 3452 | ext4_set_inode_state(inode, |
3498 | EXT4_STATE_DIO_UNWRITTEN); | 3453 | EXT4_STATE_DIO_UNWRITTEN); |
@@ -3501,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3501 | map->m_flags |= EXT4_MAP_UNINIT; | 3456 | map->m_flags |= EXT4_MAP_UNINIT; |
3502 | } | 3457 | } |
3503 | 3458 | ||
3504 | if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { | 3459 | err = check_eofblocks_fl(handle, inode, map, path, ar.len); |
3505 | if (unlikely(!eh->eh_entries)) { | 3460 | if (err) |
3506 | EXT4_ERROR_INODE(inode, | 3461 | goto out2; |
3507 | "eh->eh_entries == 0 and " | 3462 | |
3508 | "EOFBLOCKS_FL set"); | ||
3509 | err = -EIO; | ||
3510 | goto out2; | ||
3511 | } | ||
3512 | last_ex = EXT_LAST_EXTENT(eh); | ||
3513 | /* | ||
3514 | * If the current leaf block was reached by looking at | ||
3515 | * the last index block all the way down the tree, and | ||
3516 | * we are extending the inode beyond the last extent | ||
3517 | * in the current leaf block, then clear the | ||
3518 | * EOFBLOCKS_FL flag. | ||
3519 | */ | ||
3520 | for (i = depth-1; i >= 0; i--) { | ||
3521 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3522 | break; | ||
3523 | } | ||
3524 | if ((i < 0) && | ||
3525 | (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + | ||
3526 | ext4_ext_get_actual_len(last_ex))) | ||
3527 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3528 | } | ||
3529 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3463 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3530 | if (err) { | 3464 | if (err) { |
3531 | /* free data blocks we just allocated */ | 3465 | /* free data blocks we just allocated */ |
3532 | /* not a good idea to call discard here directly, | 3466 | /* not a good idea to call discard here directly, |
3533 | * but otherwise we'd need to call it every free() */ | 3467 | * but otherwise we'd need to call it every free() */ |
3534 | ext4_discard_preallocations(inode); | 3468 | ext4_discard_preallocations(inode); |
3535 | ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), | 3469 | ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), |
3536 | ext4_ext_get_actual_len(&newex), 0); | 3470 | ext4_ext_get_actual_len(&newex), 0); |
3537 | goto out2; | 3471 | goto out2; |
3538 | } | 3472 | } |
3539 | 3473 | ||
3540 | /* previous routine could use block we allocated */ | 3474 | /* previous routine could use block we allocated */ |
3541 | newblock = ext_pblock(&newex); | 3475 | newblock = ext4_ext_pblock(&newex); |
3542 | allocated = ext4_ext_get_actual_len(&newex); | 3476 | allocated = ext4_ext_get_actual_len(&newex); |
3543 | if (allocated > map->m_len) | 3477 | if (allocated > map->m_len) |
3544 | allocated = map->m_len; | 3478 | allocated = map->m_len; |
@@ -3733,7 +3667,7 @@ retry: | |||
3733 | printk(KERN_ERR "%s: ext4_ext_map_blocks " | 3667 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3734 | "returned error inode#%lu, block=%u, " | 3668 | "returned error inode#%lu, block=%u, " |
3735 | "max_blocks=%u", __func__, | 3669 | "max_blocks=%u", __func__, |
3736 | inode->i_ino, block, max_blocks); | 3670 | inode->i_ino, map.m_lblk, max_blocks); |
3737 | #endif | 3671 | #endif |
3738 | ext4_mark_inode_dirty(handle, inode); | 3672 | ext4_mark_inode_dirty(handle, inode); |
3739 | ret2 = ext4_journal_stop(handle); | 3673 | ret2 = ext4_journal_stop(handle); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 5313ae4cda2d..5a5c55ddceef 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -70,7 +70,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
71 | size_t length = iov_length(iov, nr_segs); | 71 | size_t length = iov_length(iov, nr_segs); |
72 | 72 | ||
73 | if (pos > sbi->s_bitmap_maxbytes) | 73 | if ((pos > sbi->s_bitmap_maxbytes || |
74 | (pos == sbi->s_bitmap_maxbytes && length > 0))) | ||
74 | return -EFBIG; | 75 | return -EFBIG; |
75 | 76 | ||
76 | if (pos + length > sbi->s_bitmap_maxbytes) { | 77 | if (pos + length > sbi->s_bitmap_maxbytes) { |
@@ -123,14 +124,56 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
123 | if (!IS_ERR(cp)) { | 124 | if (!IS_ERR(cp)) { |
124 | memcpy(sbi->s_es->s_last_mounted, cp, | 125 | memcpy(sbi->s_es->s_last_mounted, cp, |
125 | sizeof(sbi->s_es->s_last_mounted)); | 126 | sizeof(sbi->s_es->s_last_mounted)); |
126 | sb->s_dirt = 1; | 127 | ext4_mark_super_dirty(sb); |
127 | } | 128 | } |
128 | } | 129 | } |
129 | return dquot_file_open(inode, filp); | 130 | return dquot_file_open(inode, filp); |
130 | } | 131 | } |
131 | 132 | ||
133 | /* | ||
134 | * ext4_llseek() copied from generic_file_llseek() to handle both | ||
135 | * block-mapped and extent-mapped maxbytes values. This should | ||
136 | * otherwise be identical with generic_file_llseek(). | ||
137 | */ | ||
138 | loff_t ext4_llseek(struct file *file, loff_t offset, int origin) | ||
139 | { | ||
140 | struct inode *inode = file->f_mapping->host; | ||
141 | loff_t maxbytes; | ||
142 | |||
143 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | ||
144 | maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; | ||
145 | else | ||
146 | maxbytes = inode->i_sb->s_maxbytes; | ||
147 | mutex_lock(&inode->i_mutex); | ||
148 | switch (origin) { | ||
149 | case SEEK_END: | ||
150 | offset += inode->i_size; | ||
151 | break; | ||
152 | case SEEK_CUR: | ||
153 | if (offset == 0) { | ||
154 | mutex_unlock(&inode->i_mutex); | ||
155 | return file->f_pos; | ||
156 | } | ||
157 | offset += file->f_pos; | ||
158 | break; | ||
159 | } | ||
160 | |||
161 | if (offset < 0 || offset > maxbytes) { | ||
162 | mutex_unlock(&inode->i_mutex); | ||
163 | return -EINVAL; | ||
164 | } | ||
165 | |||
166 | if (offset != file->f_pos) { | ||
167 | file->f_pos = offset; | ||
168 | file->f_version = 0; | ||
169 | } | ||
170 | mutex_unlock(&inode->i_mutex); | ||
171 | |||
172 | return offset; | ||
173 | } | ||
174 | |||
132 | const struct file_operations ext4_file_operations = { | 175 | const struct file_operations ext4_file_operations = { |
133 | .llseek = generic_file_llseek, | 176 | .llseek = ext4_llseek, |
134 | .read = do_sync_read, | 177 | .read = do_sync_read, |
135 | .write = do_sync_write, | 178 | .write = do_sync_write, |
136 | .aio_read = generic_file_aio_read, | 179 | .aio_read = generic_file_aio_read, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 592adf2e546e..c1a7bc923cf6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -34,6 +34,89 @@ | |||
34 | 34 | ||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | ||
38 | { | ||
39 | #ifdef EXT4_DEBUG | ||
40 | struct list_head *cur, *before, *after; | ||
41 | ext4_io_end_t *io, *io0, *io1; | ||
42 | unsigned long flags; | ||
43 | |||
44 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
45 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
50 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
51 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
52 | cur = &io->list; | ||
53 | before = cur->prev; | ||
54 | io0 = container_of(before, ext4_io_end_t, list); | ||
55 | after = cur->next; | ||
56 | io1 = container_of(after, ext4_io_end_t, list); | ||
57 | |||
58 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
59 | io, inode->i_ino, io0, io1); | ||
60 | } | ||
61 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This function is called from ext4_sync_file(). | ||
67 | * | ||
68 | * When IO is completed, the work to convert unwritten extents to | ||
69 | * written is queued on workqueue but may not get immediately | ||
70 | * scheduled. When fsync is called, we need to ensure the | ||
71 | * conversion is complete before fsync returns. | ||
72 | * The inode keeps track of a list of pending/completed IO that | ||
73 | * might needs to do the conversion. This function walks through | ||
74 | * the list and convert the related unwritten extents for completed IO | ||
75 | * to written. | ||
76 | * The function return the number of pending IOs on success. | ||
77 | */ | ||
78 | static int flush_completed_IO(struct inode *inode) | ||
79 | { | ||
80 | ext4_io_end_t *io; | ||
81 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
82 | unsigned long flags; | ||
83 | int ret = 0; | ||
84 | int ret2 = 0; | ||
85 | |||
86 | if (list_empty(&ei->i_completed_io_list)) | ||
87 | return ret; | ||
88 | |||
89 | dump_completed_IO(inode); | ||
90 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
91 | while (!list_empty(&ei->i_completed_io_list)){ | ||
92 | io = list_entry(ei->i_completed_io_list.next, | ||
93 | ext4_io_end_t, list); | ||
94 | /* | ||
95 | * Calling ext4_end_io_nolock() to convert completed | ||
96 | * IO to written. | ||
97 | * | ||
98 | * When ext4_sync_file() is called, run_queue() may already | ||
99 | * about to flush the work corresponding to this io structure. | ||
100 | * It will be upset if it founds the io structure related | ||
101 | * to the work-to-be schedule is freed. | ||
102 | * | ||
103 | * Thus we need to keep the io structure still valid here after | ||
104 | * convertion finished. The io structure has a flag to | ||
105 | * avoid double converting from both fsync and background work | ||
106 | * queue work. | ||
107 | */ | ||
108 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
109 | ret = ext4_end_io_nolock(io); | ||
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
111 | if (ret < 0) | ||
112 | ret2 = ret; | ||
113 | else | ||
114 | list_del_init(&io->list); | ||
115 | } | ||
116 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
117 | return (ret2 < 0) ? ret2 : 0; | ||
118 | } | ||
119 | |||
37 | /* | 120 | /* |
38 | * If we're not journaling and this is a just-created file, we have to | 121 | * If we're not journaling and this is a just-created file, we have to |
39 | * sync our parent directory (if it was freshly created) since | 122 | * sync our parent directory (if it was freshly created) since |
@@ -128,10 +211,9 @@ int ext4_sync_file(struct file *file, int datasync) | |||
128 | (journal->j_fs_dev != journal->j_dev) && | 211 | (journal->j_fs_dev != journal->j_dev) && |
129 | (journal->j_flags & JBD2_BARRIER)) | 212 | (journal->j_flags & JBD2_BARRIER)) |
130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | 213 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
131 | NULL, BLKDEV_IFL_WAIT); | 214 | NULL); |
132 | ret = jbd2_log_wait_commit(journal, commit_tid); | 215 | ret = jbd2_log_wait_commit(journal, commit_tid); |
133 | } else if (journal->j_flags & JBD2_BARRIER) | 216 | } else if (journal->j_flags & JBD2_BARRIER) |
134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 217 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
135 | BLKDEV_IFL_WAIT); | ||
136 | return ret; | 218 | return ret; |
137 | } | 219 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25c4b3173fd9..1ce240a23ebb 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -50,7 +50,7 @@ | |||
50 | * need to use it within a single byte (to ensure we get endianness right). | 50 | * need to use it within a single byte (to ensure we get endianness right). |
51 | * We can use memset for the rest of the bitmap as there are no other users. | 51 | * We can use memset for the rest of the bitmap as there are no other users. |
52 | */ | 52 | */ |
53 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | 53 | void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | 56 | ||
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
65 | } | 65 | } |
66 | 66 | ||
67 | /* Initializes an uninitialized inode bitmap */ | 67 | /* Initializes an uninitialized inode bitmap */ |
68 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | 68 | static unsigned ext4_init_inode_bitmap(struct super_block *sb, |
69 | ext4_group_t block_group, | 69 | struct buffer_head *bh, |
70 | struct ext4_group_desc *gdp) | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | ||
71 | { | 72 | { |
72 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 73 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
73 | 74 | ||
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
85 | } | 86 | } |
86 | 87 | ||
87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
89 | bh->b_data); | 90 | bh->b_data); |
90 | 91 | ||
91 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
107 | desc = ext4_get_group_desc(sb, block_group, NULL); | 108 | desc = ext4_get_group_desc(sb, block_group, NULL); |
108 | if (!desc) | 109 | if (!desc) |
109 | return NULL; | 110 | return NULL; |
111 | |||
110 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 112 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
111 | bh = sb_getblk(sb, bitmap_blk); | 113 | bh = sb_getblk(sb, bitmap_blk); |
112 | if (unlikely(!bh)) { | 114 | if (unlikely(!bh)) { |
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 125 | unlock_buffer(bh); |
124 | return bh; | 126 | return bh; |
125 | } | 127 | } |
128 | |||
126 | ext4_lock_group(sb, block_group); | 129 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 130 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 131 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
133 | return bh; | 136 | return bh; |
134 | } | 137 | } |
135 | ext4_unlock_group(sb, block_group); | 138 | ext4_unlock_group(sb, block_group); |
139 | |||
136 | if (buffer_uptodate(bh)) { | 140 | if (buffer_uptodate(bh)) { |
137 | /* | 141 | /* |
138 | * if not uninit if bh is uptodate, | 142 | * if not uninit if bh is uptodate, |
@@ -222,7 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
222 | is_directory = S_ISDIR(inode->i_mode); | 226 | is_directory = S_ISDIR(inode->i_mode); |
223 | 227 | ||
224 | /* Do this BEFORE marking the inode not in use or returning an error */ | 228 | /* Do this BEFORE marking the inode not in use or returning an error */ |
225 | clear_inode(inode); | 229 | ext4_clear_inode(inode); |
226 | 230 | ||
227 | es = EXT4_SB(sb)->s_es; | 231 | es = EXT4_SB(sb)->s_es; |
228 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 232 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
@@ -279,7 +283,7 @@ out: | |||
279 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 283 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
280 | if (!fatal) | 284 | if (!fatal) |
281 | fatal = err; | 285 | fatal = err; |
282 | sb->s_dirt = 1; | 286 | ext4_mark_super_dirty(sb); |
283 | } else | 287 | } else |
284 | ext4_error(sb, "bit already cleared for inode %lu", ino); | 288 | ext4_error(sb, "bit already cleared for inode %lu", ino); |
285 | 289 | ||
@@ -411,8 +415,8 @@ struct orlov_stats { | |||
411 | * for a particular block group or flex_bg. If flex_size is 1, then g | 415 | * for a particular block group or flex_bg. If flex_size is 1, then g |
412 | * is a block group number; otherwise it is flex_bg number. | 416 | * is a block group number; otherwise it is flex_bg number. |
413 | */ | 417 | */ |
414 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | 418 | static void get_orlov_stats(struct super_block *sb, ext4_group_t g, |
415 | int flex_size, struct orlov_stats *stats) | 419 | int flex_size, struct orlov_stats *stats) |
416 | { | 420 | { |
417 | struct ext4_group_desc *desc; | 421 | struct ext4_group_desc *desc; |
418 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; | 422 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb, | |||
712 | { | 716 | { |
713 | int free = 0, retval = 0, count; | 717 | int free = 0, retval = 0, count; |
714 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 718 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
719 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
715 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 720 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
716 | 721 | ||
722 | /* | ||
723 | * We have to be sure that new inode allocation does not race with | ||
724 | * inode table initialization, because otherwise we may end up | ||
725 | * allocating and writing new inode right before sb_issue_zeroout | ||
726 | * takes place and overwriting our new inode with zeroes. So we | ||
727 | * take alloc_sem to prevent it. | ||
728 | */ | ||
729 | down_read(&grp->alloc_sem); | ||
717 | ext4_lock_group(sb, group); | 730 | ext4_lock_group(sb, group); |
718 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 731 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
719 | /* not a free inode */ | 732 | /* not a free inode */ |
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
724 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 737 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
725 | ino > EXT4_INODES_PER_GROUP(sb)) { | 738 | ino > EXT4_INODES_PER_GROUP(sb)) { |
726 | ext4_unlock_group(sb, group); | 739 | ext4_unlock_group(sb, group); |
740 | up_read(&grp->alloc_sem); | ||
727 | ext4_error(sb, "reserved inode or inode > inodes count - " | 741 | ext4_error(sb, "reserved inode or inode > inodes count - " |
728 | "block_group = %u, inode=%lu", group, | 742 | "block_group = %u, inode=%lu", group, |
729 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 743 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
772 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
773 | err_ret: | 787 | err_ret: |
774 | ext4_unlock_group(sb, group); | 788 | ext4_unlock_group(sb, group); |
789 | up_read(&grp->alloc_sem); | ||
775 | return retval; | 790 | return retval; |
776 | } | 791 | } |
777 | 792 | ||
@@ -965,7 +980,7 @@ got: | |||
965 | percpu_counter_dec(&sbi->s_freeinodes_counter); | 980 | percpu_counter_dec(&sbi->s_freeinodes_counter); |
966 | if (S_ISDIR(mode)) | 981 | if (S_ISDIR(mode)) |
967 | percpu_counter_inc(&sbi->s_dirs_counter); | 982 | percpu_counter_inc(&sbi->s_dirs_counter); |
968 | sb->s_dirt = 1; | 983 | ext4_mark_super_dirty(sb); |
969 | 984 | ||
970 | if (sbi->s_log_groups_per_flex) { | 985 | if (sbi->s_log_groups_per_flex) { |
971 | flex_group = ext4_flex_group(sbi, group); | 986 | flex_group = ext4_flex_group(sbi, group); |
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1205 | } | 1220 | } |
1206 | return count; | 1221 | return count; |
1207 | } | 1222 | } |
1223 | |||
1224 | /* | ||
1225 | * Zeroes not yet zeroed inode table - just write zeroes through the whole | ||
1226 | * inode table. Must be called without any spinlock held. The only place | ||
1227 | * where it is called from on active part of filesystem is ext4lazyinit | ||
1228 | * thread, so we do not need any special locks, however we have to prevent | ||
1229 | * inode allocation from the current group, so we take alloc_sem lock, to | ||
1230 | * block ext4_claim_inode until we are finished. | ||
1231 | */ | ||
1232 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | ||
1233 | int barrier) | ||
1234 | { | ||
1235 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
1236 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1237 | struct ext4_group_desc *gdp = NULL; | ||
1238 | struct buffer_head *group_desc_bh; | ||
1239 | handle_t *handle; | ||
1240 | ext4_fsblk_t blk; | ||
1241 | int num, ret = 0, used_blks = 0; | ||
1242 | |||
1243 | /* This should not happen, but just to be sure check this */ | ||
1244 | if (sb->s_flags & MS_RDONLY) { | ||
1245 | ret = 1; | ||
1246 | goto out; | ||
1247 | } | ||
1248 | |||
1249 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | ||
1250 | if (!gdp) | ||
1251 | goto out; | ||
1252 | |||
1253 | /* | ||
1254 | * We do not need to lock this, because we are the only one | ||
1255 | * handling this flag. | ||
1256 | */ | ||
1257 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
1258 | goto out; | ||
1259 | |||
1260 | handle = ext4_journal_start_sb(sb, 1); | ||
1261 | if (IS_ERR(handle)) { | ||
1262 | ret = PTR_ERR(handle); | ||
1263 | goto out; | ||
1264 | } | ||
1265 | |||
1266 | down_write(&grp->alloc_sem); | ||
1267 | /* | ||
1268 | * If inode bitmap was already initialized there may be some | ||
1269 | * used inodes so we need to skip blocks with used inodes in | ||
1270 | * inode table. | ||
1271 | */ | ||
1272 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
1273 | used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - | ||
1274 | ext4_itable_unused_count(sb, gdp)), | ||
1275 | sbi->s_inodes_per_block); | ||
1276 | |||
1277 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | ||
1278 | ext4_error(sb, "Something is wrong with group %u\n" | ||
1279 | "Used itable blocks: %d" | ||
1280 | "itable unused count: %u\n", | ||
1281 | group, used_blks, | ||
1282 | ext4_itable_unused_count(sb, gdp)); | ||
1283 | ret = 1; | ||
1284 | goto out; | ||
1285 | } | ||
1286 | |||
1287 | blk = ext4_inode_table(sb, gdp) + used_blks; | ||
1288 | num = sbi->s_itb_per_group - used_blks; | ||
1289 | |||
1290 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
1291 | ret = ext4_journal_get_write_access(handle, | ||
1292 | group_desc_bh); | ||
1293 | if (ret) | ||
1294 | goto err_out; | ||
1295 | |||
1296 | /* | ||
1297 | * Skip zeroout if the inode table is full. But we set the ZEROED | ||
1298 | * flag anyway, because obviously, when it is full it does not need | ||
1299 | * further zeroing. | ||
1300 | */ | ||
1301 | if (unlikely(num == 0)) | ||
1302 | goto skip_zeroout; | ||
1303 | |||
1304 | ext4_debug("going to zero out inode table in group %d\n", | ||
1305 | group); | ||
1306 | ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); | ||
1307 | if (ret < 0) | ||
1308 | goto err_out; | ||
1309 | if (barrier) | ||
1310 | blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); | ||
1311 | |||
1312 | skip_zeroout: | ||
1313 | ext4_lock_group(sb, group); | ||
1314 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
1315 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1316 | ext4_unlock_group(sb, group); | ||
1317 | |||
1318 | BUFFER_TRACE(group_desc_bh, | ||
1319 | "call ext4_handle_dirty_metadata"); | ||
1320 | ret = ext4_handle_dirty_metadata(handle, NULL, | ||
1321 | group_desc_bh); | ||
1322 | |||
1323 | err_out: | ||
1324 | up_write(&grp->alloc_sem); | ||
1325 | ext4_journal_stop(handle); | ||
1326 | out: | ||
1327 | return ret; | ||
1328 | } | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 42272d67955a..2d6c6c8c036d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
60 | } | 60 | } |
61 | 61 | ||
62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
63 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
64 | struct buffer_head *bh_result, int create); | ||
65 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
66 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
67 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
68 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
63 | 69 | ||
64 | /* | 70 | /* |
65 | * Test whether an inode is a fast symlink. | 71 | * Test whether an inode is a fast symlink. |
@@ -167,11 +173,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
167 | /* | 173 | /* |
168 | * Called at the last iput() if i_nlink is zero. | 174 | * Called at the last iput() if i_nlink is zero. |
169 | */ | 175 | */ |
170 | void ext4_delete_inode(struct inode *inode) | 176 | void ext4_evict_inode(struct inode *inode) |
171 | { | 177 | { |
172 | handle_t *handle; | 178 | handle_t *handle; |
173 | int err; | 179 | int err; |
174 | 180 | ||
181 | if (inode->i_nlink) { | ||
182 | truncate_inode_pages(&inode->i_data, 0); | ||
183 | goto no_delete; | ||
184 | } | ||
185 | |||
175 | if (!is_bad_inode(inode)) | 186 | if (!is_bad_inode(inode)) |
176 | dquot_initialize(inode); | 187 | dquot_initialize(inode); |
177 | 188 | ||
@@ -221,6 +232,7 @@ void ext4_delete_inode(struct inode *inode) | |||
221 | "couldn't extend journal (err %d)", err); | 232 | "couldn't extend journal (err %d)", err); |
222 | stop_handle: | 233 | stop_handle: |
223 | ext4_journal_stop(handle); | 234 | ext4_journal_stop(handle); |
235 | ext4_orphan_del(NULL, inode); | ||
224 | goto no_delete; | 236 | goto no_delete; |
225 | } | 237 | } |
226 | } | 238 | } |
@@ -245,13 +257,13 @@ void ext4_delete_inode(struct inode *inode) | |||
245 | */ | 257 | */ |
246 | if (ext4_mark_inode_dirty(handle, inode)) | 258 | if (ext4_mark_inode_dirty(handle, inode)) |
247 | /* If that failed, just do the required in-core inode clear. */ | 259 | /* If that failed, just do the required in-core inode clear. */ |
248 | clear_inode(inode); | 260 | ext4_clear_inode(inode); |
249 | else | 261 | else |
250 | ext4_free_inode(handle, inode); | 262 | ext4_free_inode(handle, inode); |
251 | ext4_journal_stop(handle); | 263 | ext4_journal_stop(handle); |
252 | return; | 264 | return; |
253 | no_delete: | 265 | no_delete: |
254 | clear_inode(inode); /* We must guarantee clearing of inode... */ | 266 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
255 | } | 267 | } |
256 | 268 | ||
257 | typedef struct { | 269 | typedef struct { |
@@ -337,9 +349,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
337 | return n; | 349 | return n; |
338 | } | 350 | } |
339 | 351 | ||
340 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 352 | static int __ext4_check_blockref(const char *function, unsigned int line, |
353 | struct inode *inode, | ||
341 | __le32 *p, unsigned int max) | 354 | __le32 *p, unsigned int max) |
342 | { | 355 | { |
356 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
343 | __le32 *bref = p; | 357 | __le32 *bref = p; |
344 | unsigned int blk; | 358 | unsigned int blk; |
345 | 359 | ||
@@ -348,8 +362,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
348 | if (blk && | 362 | if (blk && |
349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 363 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
350 | blk, 1))) { | 364 | blk, 1))) { |
351 | ext4_error_inode(function, inode, | 365 | es->s_last_error_block = cpu_to_le64(blk); |
352 | "invalid block reference %u", blk); | 366 | ext4_error_inode(inode, function, line, blk, |
367 | "invalid block"); | ||
353 | return -EIO; | 368 | return -EIO; |
354 | } | 369 | } |
355 | } | 370 | } |
@@ -358,11 +373,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
358 | 373 | ||
359 | 374 | ||
360 | #define ext4_check_indirect_blockref(inode, bh) \ | 375 | #define ext4_check_indirect_blockref(inode, bh) \ |
361 | __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ | 376 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
377 | (__le32 *)(bh)->b_data, \ | ||
362 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | 378 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) |
363 | 379 | ||
364 | #define ext4_check_inode_blockref(inode) \ | 380 | #define ext4_check_inode_blockref(inode) \ |
365 | __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ | 381 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
382 | EXT4_I(inode)->i_data, \ | ||
366 | EXT4_NDIR_BLOCKS) | 383 | EXT4_NDIR_BLOCKS) |
367 | 384 | ||
368 | /** | 385 | /** |
@@ -744,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
744 | * parent to disk. | 761 | * parent to disk. |
745 | */ | 762 | */ |
746 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 763 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
764 | if (unlikely(!bh)) { | ||
765 | err = -EIO; | ||
766 | goto failed; | ||
767 | } | ||
768 | |||
747 | branch[n].bh = bh; | 769 | branch[n].bh = bh; |
748 | lock_buffer(bh); | 770 | lock_buffer(bh); |
749 | BUFFER_TRACE(bh, "call get_create_access"); | 771 | BUFFER_TRACE(bh, "call get_create_access"); |
@@ -1128,20 +1150,24 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1128 | ext4_discard_preallocations(inode); | 1150 | ext4_discard_preallocations(inode); |
1129 | } | 1151 | } |
1130 | 1152 | ||
1131 | static int check_block_validity(struct inode *inode, const char *func, | 1153 | static int __check_block_validity(struct inode *inode, const char *func, |
1154 | unsigned int line, | ||
1132 | struct ext4_map_blocks *map) | 1155 | struct ext4_map_blocks *map) |
1133 | { | 1156 | { |
1134 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, | 1157 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
1135 | map->m_len)) { | 1158 | map->m_len)) { |
1136 | ext4_error_inode(func, inode, | 1159 | ext4_error_inode(inode, func, line, map->m_pblk, |
1137 | "lblock %lu mapped to illegal pblock %llu " | 1160 | "lblock %lu mapped to illegal pblock " |
1138 | "(length %d)", (unsigned long) map->m_lblk, | 1161 | "(length %d)", (unsigned long) map->m_lblk, |
1139 | map->m_pblk, map->m_len); | 1162 | map->m_len); |
1140 | return -EIO; | 1163 | return -EIO; |
1141 | } | 1164 | } |
1142 | return 0; | 1165 | return 0; |
1143 | } | 1166 | } |
1144 | 1167 | ||
1168 | #define check_block_validity(inode, map) \ | ||
1169 | __check_block_validity((inode), __func__, __LINE__, (map)) | ||
1170 | |||
1145 | /* | 1171 | /* |
1146 | * Return the number of contiguous dirty pages in a given inode | 1172 | * Return the number of contiguous dirty pages in a given inode |
1147 | * starting at page frame idx. | 1173 | * starting at page frame idx. |
@@ -1192,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1192 | break; | 1218 | break; |
1193 | idx++; | 1219 | idx++; |
1194 | num++; | 1220 | num++; |
1195 | if (num >= max_pages) | 1221 | if (num >= max_pages) { |
1222 | done = 1; | ||
1196 | break; | 1223 | break; |
1224 | } | ||
1197 | } | 1225 | } |
1198 | pagevec_release(&pvec); | 1226 | pagevec_release(&pvec); |
1199 | } | 1227 | } |
@@ -1244,7 +1272,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1244 | up_read((&EXT4_I(inode)->i_data_sem)); | 1272 | up_read((&EXT4_I(inode)->i_data_sem)); |
1245 | 1273 | ||
1246 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1274 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1247 | int ret = check_block_validity(inode, __func__, map); | 1275 | int ret = check_block_validity(inode, map); |
1248 | if (ret != 0) | 1276 | if (ret != 0) |
1249 | return ret; | 1277 | return ret; |
1250 | } | 1278 | } |
@@ -1324,9 +1352,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1324 | 1352 | ||
1325 | up_write((&EXT4_I(inode)->i_data_sem)); | 1353 | up_write((&EXT4_I(inode)->i_data_sem)); |
1326 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1354 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1327 | int ret = check_block_validity(inode, | 1355 | int ret = check_block_validity(inode, map); |
1328 | "ext4_map_blocks_after_alloc", | ||
1329 | map); | ||
1330 | if (ret != 0) | 1356 | if (ret != 0) |
1331 | return ret; | 1357 | return ret; |
1332 | } | 1358 | } |
@@ -1519,9 +1545,25 @@ static int walk_page_buffers(handle_t *handle, | |||
1519 | static int do_journal_get_write_access(handle_t *handle, | 1545 | static int do_journal_get_write_access(handle_t *handle, |
1520 | struct buffer_head *bh) | 1546 | struct buffer_head *bh) |
1521 | { | 1547 | { |
1548 | int dirty = buffer_dirty(bh); | ||
1549 | int ret; | ||
1550 | |||
1522 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1551 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1523 | return 0; | 1552 | return 0; |
1524 | return ext4_journal_get_write_access(handle, bh); | 1553 | /* |
1554 | * __block_write_begin() could have dirtied some buffers. Clean | ||
1555 | * the dirty bit as jbd2_journal_get_write_access() could complain | ||
1556 | * otherwise about fs integrity issues. Setting of the dirty bit | ||
1557 | * by __block_write_begin() isn't a real problem here as we clear | ||
1558 | * the bit before releasing a page lock and thus writeback cannot | ||
1559 | * ever write the buffer. | ||
1560 | */ | ||
1561 | if (dirty) | ||
1562 | clear_buffer_dirty(bh); | ||
1563 | ret = ext4_journal_get_write_access(handle, bh); | ||
1564 | if (!ret && dirty) | ||
1565 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
1566 | return ret; | ||
1525 | } | 1567 | } |
1526 | 1568 | ||
1527 | /* | 1569 | /* |
@@ -1578,11 +1620,9 @@ retry: | |||
1578 | *pagep = page; | 1620 | *pagep = page; |
1579 | 1621 | ||
1580 | if (ext4_should_dioread_nolock(inode)) | 1622 | if (ext4_should_dioread_nolock(inode)) |
1581 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1623 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
1582 | fsdata, ext4_get_block_write); | ||
1583 | else | 1624 | else |
1584 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1625 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
1585 | fsdata, ext4_get_block); | ||
1586 | 1626 | ||
1587 | if (!ret && ext4_should_journal_data(inode)) { | 1627 | if (!ret && ext4_should_journal_data(inode)) { |
1588 | ret = walk_page_buffers(handle, page_buffers(page), | 1628 | ret = walk_page_buffers(handle, page_buffers(page), |
@@ -1593,7 +1633,7 @@ retry: | |||
1593 | unlock_page(page); | 1633 | unlock_page(page); |
1594 | page_cache_release(page); | 1634 | page_cache_release(page); |
1595 | /* | 1635 | /* |
1596 | * block_write_begin may have instantiated a few blocks | 1636 | * __block_write_begin may have instantiated a few blocks |
1597 | * outside i_size. Trim these off again. Don't need | 1637 | * outside i_size. Trim these off again. Don't need |
1598 | * i_size_read because we hold i_mutex. | 1638 | * i_size_read because we hold i_mutex. |
1599 | * | 1639 | * |
@@ -1968,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1968 | * | 2008 | * |
1969 | * As pages are already locked by write_cache_pages(), we can't use it | 2009 | * As pages are already locked by write_cache_pages(), we can't use it |
1970 | */ | 2010 | */ |
1971 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2011 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2012 | struct ext4_map_blocks *map) | ||
1972 | { | 2013 | { |
1973 | long pages_skipped; | ||
1974 | struct pagevec pvec; | 2014 | struct pagevec pvec; |
1975 | unsigned long index, end; | 2015 | unsigned long index, end; |
1976 | int ret = 0, err, nr_pages, i; | 2016 | int ret = 0, err, nr_pages, i; |
1977 | struct inode *inode = mpd->inode; | 2017 | struct inode *inode = mpd->inode; |
1978 | struct address_space *mapping = inode->i_mapping; | 2018 | struct address_space *mapping = inode->i_mapping; |
2019 | loff_t size = i_size_read(inode); | ||
2020 | unsigned int len, block_start; | ||
2021 | struct buffer_head *bh, *page_bufs = NULL; | ||
2022 | int journal_data = ext4_should_journal_data(inode); | ||
2023 | sector_t pblock = 0, cur_logical = 0; | ||
2024 | struct ext4_io_submit io_submit; | ||
1979 | 2025 | ||
1980 | BUG_ON(mpd->next_page <= mpd->first_page); | 2026 | BUG_ON(mpd->next_page <= mpd->first_page); |
2027 | memset(&io_submit, 0, sizeof(io_submit)); | ||
1981 | /* | 2028 | /* |
1982 | * We need to start from the first_page to the next_page - 1 | 2029 | * We need to start from the first_page to the next_page - 1 |
1983 | * to make sure we also write the mapped dirty buffer_heads. | 2030 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -1993,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1993 | if (nr_pages == 0) | 2040 | if (nr_pages == 0) |
1994 | break; | 2041 | break; |
1995 | for (i = 0; i < nr_pages; i++) { | 2042 | for (i = 0; i < nr_pages; i++) { |
2043 | int commit_write = 0, redirty_page = 0; | ||
1996 | struct page *page = pvec.pages[i]; | 2044 | struct page *page = pvec.pages[i]; |
1997 | 2045 | ||
1998 | index = page->index; | 2046 | index = page->index; |
1999 | if (index > end) | 2047 | if (index > end) |
2000 | break; | 2048 | break; |
2049 | |||
2050 | if (index == size >> PAGE_CACHE_SHIFT) | ||
2051 | len = size & ~PAGE_CACHE_MASK; | ||
2052 | else | ||
2053 | len = PAGE_CACHE_SIZE; | ||
2054 | if (map) { | ||
2055 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
2056 | inode->i_blkbits); | ||
2057 | pblock = map->m_pblk + (cur_logical - | ||
2058 | map->m_lblk); | ||
2059 | } | ||
2001 | index++; | 2060 | index++; |
2002 | 2061 | ||
2003 | BUG_ON(!PageLocked(page)); | 2062 | BUG_ON(!PageLocked(page)); |
2004 | BUG_ON(PageWriteback(page)); | 2063 | BUG_ON(PageWriteback(page)); |
2005 | 2064 | ||
2006 | pages_skipped = mpd->wbc->pages_skipped; | ||
2007 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
2008 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
2009 | /* | ||
2010 | * have successfully written the page | ||
2011 | * without skipping the same | ||
2012 | */ | ||
2013 | mpd->pages_written++; | ||
2014 | /* | 2065 | /* |
2015 | * In error case, we have to continue because | 2066 | * If the page does not have buffers (for |
2016 | * remaining pages are still locked | 2067 | * whatever reason), try to create them using |
2017 | * XXX: unlock and re-dirty them? | 2068 | * __block_write_begin. If this fails, |
2069 | * redirty the page and move on. | ||
2018 | */ | 2070 | */ |
2019 | if (ret == 0) | 2071 | if (!page_has_buffers(page)) { |
2020 | ret = err; | 2072 | if (__block_write_begin(page, 0, len, |
2021 | } | 2073 | noalloc_get_block_write)) { |
2022 | pagevec_release(&pvec); | 2074 | redirty_page: |
2023 | } | 2075 | redirty_page_for_writepage(mpd->wbc, |
2024 | return ret; | 2076 | page); |
2025 | } | 2077 | unlock_page(page); |
2026 | 2078 | continue; | |
2027 | /* | 2079 | } |
2028 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2080 | commit_write = 1; |
2029 | * | 2081 | } |
2030 | * the function goes through all passed space and put actual disk | ||
2031 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
2032 | */ | ||
2033 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
2034 | struct ext4_map_blocks *map) | ||
2035 | { | ||
2036 | struct inode *inode = mpd->inode; | ||
2037 | struct address_space *mapping = inode->i_mapping; | ||
2038 | int blocks = map->m_len; | ||
2039 | sector_t pblock = map->m_pblk, cur_logical; | ||
2040 | struct buffer_head *head, *bh; | ||
2041 | pgoff_t index, end; | ||
2042 | struct pagevec pvec; | ||
2043 | int nr_pages, i; | ||
2044 | |||
2045 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2046 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2047 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2048 | |||
2049 | pagevec_init(&pvec, 0); | ||
2050 | |||
2051 | while (index <= end) { | ||
2052 | /* XXX: optimize tail */ | ||
2053 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
2054 | if (nr_pages == 0) | ||
2055 | break; | ||
2056 | for (i = 0; i < nr_pages; i++) { | ||
2057 | struct page *page = pvec.pages[i]; | ||
2058 | |||
2059 | index = page->index; | ||
2060 | if (index > end) | ||
2061 | break; | ||
2062 | index++; | ||
2063 | |||
2064 | BUG_ON(!PageLocked(page)); | ||
2065 | BUG_ON(PageWriteback(page)); | ||
2066 | BUG_ON(!page_has_buffers(page)); | ||
2067 | |||
2068 | bh = page_buffers(page); | ||
2069 | head = bh; | ||
2070 | |||
2071 | /* skip blocks out of the range */ | ||
2072 | do { | ||
2073 | if (cur_logical >= map->m_lblk) | ||
2074 | break; | ||
2075 | cur_logical++; | ||
2076 | } while ((bh = bh->b_this_page) != head); | ||
2077 | 2082 | ||
2083 | bh = page_bufs = page_buffers(page); | ||
2084 | block_start = 0; | ||
2078 | do { | 2085 | do { |
2079 | if (cur_logical >= map->m_lblk + blocks) | 2086 | if (!bh) |
2080 | break; | 2087 | goto redirty_page; |
2081 | 2088 | if (map && (cur_logical >= map->m_lblk) && | |
2082 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2089 | (cur_logical <= (map->m_lblk + |
2083 | 2090 | (map->m_len - 1)))) { | |
2084 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
2085 | |||
2086 | if (buffer_delay(bh)) { | 2091 | if (buffer_delay(bh)) { |
2087 | clear_buffer_delay(bh); | 2092 | clear_buffer_delay(bh); |
2088 | bh->b_blocknr = pblock; | 2093 | bh->b_blocknr = pblock; |
2089 | } else { | ||
2090 | /* | ||
2091 | * unwritten already should have | ||
2092 | * blocknr assigned. Verify that | ||
2093 | */ | ||
2094 | clear_buffer_unwritten(bh); | ||
2095 | BUG_ON(bh->b_blocknr != pblock); | ||
2096 | } | 2094 | } |
2095 | if (buffer_unwritten(bh) || | ||
2096 | buffer_mapped(bh)) | ||
2097 | BUG_ON(bh->b_blocknr != pblock); | ||
2098 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
2099 | set_buffer_uninit(bh); | ||
2100 | clear_buffer_unwritten(bh); | ||
2101 | } | ||
2097 | 2102 | ||
2098 | } else if (buffer_mapped(bh)) | 2103 | /* redirty page if block allocation undone */ |
2099 | BUG_ON(bh->b_blocknr != pblock); | 2104 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2100 | 2105 | redirty_page = 1; | |
2101 | if (map->m_flags & EXT4_MAP_UNINIT) | 2106 | bh = bh->b_this_page; |
2102 | set_buffer_uninit(bh); | 2107 | block_start += bh->b_size; |
2103 | cur_logical++; | 2108 | cur_logical++; |
2104 | pblock++; | 2109 | pblock++; |
2105 | } while ((bh = bh->b_this_page) != head); | 2110 | } while (bh != page_bufs); |
2111 | |||
2112 | if (redirty_page) | ||
2113 | goto redirty_page; | ||
2114 | |||
2115 | if (commit_write) | ||
2116 | /* mark the buffer_heads as dirty & uptodate */ | ||
2117 | block_commit_write(page, 0, len); | ||
2118 | |||
2119 | /* | ||
2120 | * Delalloc doesn't support data journalling, | ||
2121 | * but eventually maybe we'll lift this | ||
2122 | * restriction. | ||
2123 | */ | ||
2124 | if (unlikely(journal_data && PageChecked(page))) | ||
2125 | err = __ext4_journalled_writepage(page, len); | ||
2126 | else | ||
2127 | err = ext4_bio_write_page(&io_submit, page, | ||
2128 | len, mpd->wbc); | ||
2129 | |||
2130 | if (!err) | ||
2131 | mpd->pages_written++; | ||
2132 | /* | ||
2133 | * In error case, we have to continue because | ||
2134 | * remaining pages are still locked | ||
2135 | */ | ||
2136 | if (ret == 0) | ||
2137 | ret = err; | ||
2106 | } | 2138 | } |
2107 | pagevec_release(&pvec); | 2139 | pagevec_release(&pvec); |
2108 | } | 2140 | } |
2141 | ext4_io_submit(&io_submit); | ||
2142 | return ret; | ||
2109 | } | 2143 | } |
2110 | 2144 | ||
2111 | |||
2112 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2145 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2113 | sector_t logical, long blk_cnt) | 2146 | sector_t logical, long blk_cnt) |
2114 | { | 2147 | { |
@@ -2160,41 +2193,38 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2160 | } | 2193 | } |
2161 | 2194 | ||
2162 | /* | 2195 | /* |
2163 | * mpage_da_map_blocks - go through given space | 2196 | * mpage_da_map_and_submit - go through given space, map them |
2197 | * if necessary, and then submit them for I/O | ||
2164 | * | 2198 | * |
2165 | * @mpd - bh describing space | 2199 | * @mpd - bh describing space |
2166 | * | 2200 | * |
2167 | * The function skips space we know is already mapped to disk blocks. | 2201 | * The function skips space we know is already mapped to disk blocks. |
2168 | * | 2202 | * |
2169 | */ | 2203 | */ |
2170 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2204 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2171 | { | 2205 | { |
2172 | int err, blks, get_blocks_flags; | 2206 | int err, blks, get_blocks_flags; |
2173 | struct ext4_map_blocks map; | 2207 | struct ext4_map_blocks map, *mapp = NULL; |
2174 | sector_t next = mpd->b_blocknr; | 2208 | sector_t next = mpd->b_blocknr; |
2175 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2209 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2176 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2210 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2177 | handle_t *handle = NULL; | 2211 | handle_t *handle = NULL; |
2178 | 2212 | ||
2179 | /* | 2213 | /* |
2180 | * We consider only non-mapped and non-allocated blocks | 2214 | * If the blocks are mapped already, or we couldn't accumulate |
2181 | */ | 2215 | * any blocks, then proceed immediately to the submission stage. |
2182 | if ((mpd->b_state & (1 << BH_Mapped)) && | ||
2183 | !(mpd->b_state & (1 << BH_Delay)) && | ||
2184 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
2185 | return 0; | ||
2186 | |||
2187 | /* | ||
2188 | * If we didn't accumulate anything to write simply return | ||
2189 | */ | 2216 | */ |
2190 | if (!mpd->b_size) | 2217 | if ((mpd->b_size == 0) || |
2191 | return 0; | 2218 | ((mpd->b_state & (1 << BH_Mapped)) && |
2219 | !(mpd->b_state & (1 << BH_Delay)) && | ||
2220 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
2221 | goto submit_io; | ||
2192 | 2222 | ||
2193 | handle = ext4_journal_current_handle(); | 2223 | handle = ext4_journal_current_handle(); |
2194 | BUG_ON(!handle); | 2224 | BUG_ON(!handle); |
2195 | 2225 | ||
2196 | /* | 2226 | /* |
2197 | * Call ext4_get_blocks() to allocate any delayed allocation | 2227 | * Call ext4_map_blocks() to allocate any delayed allocation |
2198 | * blocks, or to convert an uninitialized extent to be | 2228 | * blocks, or to convert an uninitialized extent to be |
2199 | * initialized (in the case where we have written into | 2229 | * initialized (in the case where we have written into |
2200 | * one or more preallocated blocks). | 2230 | * one or more preallocated blocks). |
@@ -2203,7 +2233,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2203 | * indicate that we are on the delayed allocation path. This | 2233 | * indicate that we are on the delayed allocation path. This |
2204 | * affects functions in many different parts of the allocation | 2234 | * affects functions in many different parts of the allocation |
2205 | * call path. This flag exists primarily because we don't | 2235 | * call path. This flag exists primarily because we don't |
2206 | * want to change *many* call functions, so ext4_get_blocks() | 2236 | * want to change *many* call functions, so ext4_map_blocks() |
2207 | * will set the magic i_delalloc_reserved_flag once the | 2237 | * will set the magic i_delalloc_reserved_flag once the |
2208 | * inode's allocation semaphore is taken. | 2238 | * inode's allocation semaphore is taken. |
2209 | * | 2239 | * |
@@ -2221,19 +2251,22 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2221 | 2251 | ||
2222 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | 2252 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
2223 | if (blks < 0) { | 2253 | if (blks < 0) { |
2254 | struct super_block *sb = mpd->inode->i_sb; | ||
2255 | |||
2224 | err = blks; | 2256 | err = blks; |
2225 | /* | 2257 | /* |
2226 | * If get block returns with error we simply | 2258 | * If get block returns EAGAIN or ENOSPC and there |
2227 | * return. Later writepage will redirty the page and | 2259 | * appears to be free blocks we will call |
2228 | * writepages will find the dirty page again | 2260 | * ext4_writepage() for all of the pages which will |
2261 | * just redirty the pages. | ||
2229 | */ | 2262 | */ |
2230 | if (err == -EAGAIN) | 2263 | if (err == -EAGAIN) |
2231 | return 0; | 2264 | goto submit_io; |
2232 | 2265 | ||
2233 | if (err == -ENOSPC && | 2266 | if (err == -ENOSPC && |
2234 | ext4_count_free_blocks(mpd->inode->i_sb)) { | 2267 | ext4_count_free_blocks(sb)) { |
2235 | mpd->retval = err; | 2268 | mpd->retval = err; |
2236 | return 0; | 2269 | goto submit_io; |
2237 | } | 2270 | } |
2238 | 2271 | ||
2239 | /* | 2272 | /* |
@@ -2243,24 +2276,26 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2243 | * writepage and writepages will again try to write | 2276 | * writepage and writepages will again try to write |
2244 | * the same. | 2277 | * the same. |
2245 | */ | 2278 | */ |
2246 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, | 2279 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { |
2247 | "delayed block allocation failed for inode %lu at " | 2280 | ext4_msg(sb, KERN_CRIT, |
2248 | "logical offset %llu with max blocks %zd with " | 2281 | "delayed block allocation failed for inode %lu " |
2249 | "error %d", mpd->inode->i_ino, | 2282 | "at logical offset %llu with max blocks %zd " |
2250 | (unsigned long long) next, | 2283 | "with error %d", mpd->inode->i_ino, |
2251 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2284 | (unsigned long long) next, |
2252 | printk(KERN_CRIT "This should not happen!! " | 2285 | mpd->b_size >> mpd->inode->i_blkbits, err); |
2253 | "Data will be lost\n"); | 2286 | ext4_msg(sb, KERN_CRIT, |
2254 | if (err == -ENOSPC) { | 2287 | "This should not happen!! Data will be lost\n"); |
2255 | ext4_print_free_blocks(mpd->inode); | 2288 | if (err == -ENOSPC) |
2289 | ext4_print_free_blocks(mpd->inode); | ||
2256 | } | 2290 | } |
2257 | /* invalidate all the pages */ | 2291 | /* invalidate all the pages */ |
2258 | ext4_da_block_invalidatepages(mpd, next, | 2292 | ext4_da_block_invalidatepages(mpd, next, |
2259 | mpd->b_size >> mpd->inode->i_blkbits); | 2293 | mpd->b_size >> mpd->inode->i_blkbits); |
2260 | return err; | 2294 | return; |
2261 | } | 2295 | } |
2262 | BUG_ON(blks == 0); | 2296 | BUG_ON(blks == 0); |
2263 | 2297 | ||
2298 | mapp = ↦ | ||
2264 | if (map.m_flags & EXT4_MAP_NEW) { | 2299 | if (map.m_flags & EXT4_MAP_NEW) { |
2265 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2300 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2266 | int i; | 2301 | int i; |
@@ -2269,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2269 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2304 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2270 | } | 2305 | } |
2271 | 2306 | ||
2272 | /* | ||
2273 | * If blocks are delayed marked, we need to | ||
2274 | * put actual blocknr and drop delayed bit | ||
2275 | */ | ||
2276 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
2277 | (mpd->b_state & (1 << BH_Unwritten))) | ||
2278 | mpage_put_bnr_to_bhs(mpd, &map); | ||
2279 | |||
2280 | if (ext4_should_order_data(mpd->inode)) { | 2307 | if (ext4_should_order_data(mpd->inode)) { |
2281 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2308 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2282 | if (err) | 2309 | if (err) |
2283 | return err; | 2310 | /* This only happens if the journal is aborted */ |
2311 | return; | ||
2284 | } | 2312 | } |
2285 | 2313 | ||
2286 | /* | 2314 | /* |
@@ -2291,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2291 | disksize = i_size_read(mpd->inode); | 2319 | disksize = i_size_read(mpd->inode); |
2292 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2320 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2293 | ext4_update_i_disksize(mpd->inode, disksize); | 2321 | ext4_update_i_disksize(mpd->inode, disksize); |
2294 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2322 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2323 | if (err) | ||
2324 | ext4_error(mpd->inode->i_sb, | ||
2325 | "Failed to mark inode %lu dirty", | ||
2326 | mpd->inode->i_ino); | ||
2295 | } | 2327 | } |
2296 | 2328 | ||
2297 | return 0; | 2329 | submit_io: |
2330 | mpage_da_submit_io(mpd, mapp); | ||
2331 | mpd->io_done = 1; | ||
2298 | } | 2332 | } |
2299 | 2333 | ||
2300 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2334 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -2320,7 +2354,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
2320 | * XXX Don't go larger than mballoc is willing to allocate | 2354 | * XXX Don't go larger than mballoc is willing to allocate |
2321 | * This is a stopgap solution. We eventually need to fold | 2355 | * This is a stopgap solution. We eventually need to fold |
2322 | * mpage_da_submit_io() into this function and then call | 2356 | * mpage_da_submit_io() into this function and then call |
2323 | * ext4_get_blocks() multiple times in a loop | 2357 | * ext4_map_blocks() multiple times in a loop |
2324 | */ | 2358 | */ |
2325 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | 2359 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) |
2326 | goto flush_it; | 2360 | goto flush_it; |
@@ -2371,9 +2405,7 @@ flush_it: | |||
2371 | * We couldn't merge the block to our extent, so we | 2405 | * We couldn't merge the block to our extent, so we |
2372 | * need to flush current extent and start new one | 2406 | * need to flush current extent and start new one |
2373 | */ | 2407 | */ |
2374 | if (mpage_da_map_blocks(mpd) == 0) | 2408 | mpage_da_map_and_submit(mpd); |
2375 | mpage_da_submit_io(mpd); | ||
2376 | mpd->io_done = 1; | ||
2377 | return; | 2409 | return; |
2378 | } | 2410 | } |
2379 | 2411 | ||
@@ -2392,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2392 | * The function finds extents of pages and scan them for all blocks. | 2424 | * The function finds extents of pages and scan them for all blocks. |
2393 | */ | 2425 | */ |
2394 | static int __mpage_da_writepage(struct page *page, | 2426 | static int __mpage_da_writepage(struct page *page, |
2395 | struct writeback_control *wbc, void *data) | 2427 | struct writeback_control *wbc, |
2428 | struct mpage_da_data *mpd) | ||
2396 | { | 2429 | { |
2397 | struct mpage_da_data *mpd = data; | ||
2398 | struct inode *inode = mpd->inode; | 2430 | struct inode *inode = mpd->inode; |
2399 | struct buffer_head *bh, *head; | 2431 | struct buffer_head *bh, *head; |
2400 | sector_t logical; | 2432 | sector_t logical; |
@@ -2405,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page, | |||
2405 | if (mpd->next_page != page->index) { | 2437 | if (mpd->next_page != page->index) { |
2406 | /* | 2438 | /* |
2407 | * Nope, we can't. So, we map non-allocated blocks | 2439 | * Nope, we can't. So, we map non-allocated blocks |
2408 | * and start IO on them using writepage() | 2440 | * and start IO on them |
2409 | */ | 2441 | */ |
2410 | if (mpd->next_page != mpd->first_page) { | 2442 | if (mpd->next_page != mpd->first_page) { |
2411 | if (mpage_da_map_blocks(mpd) == 0) | 2443 | mpage_da_map_and_submit(mpd); |
2412 | mpage_da_submit_io(mpd); | ||
2413 | /* | 2444 | /* |
2414 | * skip rest of the page in the page_vec | 2445 | * skip rest of the page in the page_vec |
2415 | */ | 2446 | */ |
2416 | mpd->io_done = 1; | ||
2417 | redirty_page_for_writepage(wbc, page); | 2447 | redirty_page_for_writepage(wbc, page); |
2418 | unlock_page(page); | 2448 | unlock_page(page); |
2419 | return MPAGE_DA_EXTENT_TAIL; | 2449 | return MPAGE_DA_EXTENT_TAIL; |
@@ -2520,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2520 | if (buffer_delay(bh)) | 2550 | if (buffer_delay(bh)) |
2521 | return 0; /* Not sure this could or should happen */ | 2551 | return 0; /* Not sure this could or should happen */ |
2522 | /* | 2552 | /* |
2523 | * XXX: __block_prepare_write() unmaps passed block, | 2553 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
2524 | * is it OK? | ||
2525 | */ | 2554 | */ |
2526 | ret = ext4_da_reserve_space(inode, iblock); | 2555 | ret = ext4_da_reserve_space(inode, iblock); |
2527 | if (ret) | 2556 | if (ret) |
@@ -2553,18 +2582,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2553 | /* | 2582 | /* |
2554 | * This function is used as a standard get_block_t calback function | 2583 | * This function is used as a standard get_block_t calback function |
2555 | * when there is no desire to allocate any blocks. It is used as a | 2584 | * when there is no desire to allocate any blocks. It is used as a |
2556 | * callback function for block_prepare_write(), nobh_writepage(), and | 2585 | * callback function for block_write_begin() and block_write_full_page(). |
2557 | * block_write_full_page(). These functions should only try to map a | 2586 | * These functions should only try to map a single block at a time. |
2558 | * single block at a time. | ||
2559 | * | 2587 | * |
2560 | * Since this function doesn't do block allocations even if the caller | 2588 | * Since this function doesn't do block allocations even if the caller |
2561 | * requests it by passing in create=1, it is critically important that | 2589 | * requests it by passing in create=1, it is critically important that |
2562 | * any caller checks to make sure that any buffer heads are returned | 2590 | * any caller checks to make sure that any buffer heads are returned |
2563 | * by this function are either all already mapped or marked for | 2591 | * by this function are either all already mapped or marked for |
2564 | * delayed allocation before calling nobh_writepage() or | 2592 | * delayed allocation before calling block_write_full_page(). Otherwise, |
2565 | * block_write_full_page(). Otherwise, b_blocknr could be left | 2593 | * b_blocknr could be left unitialized, and the page write functions will |
2566 | * unitialized, and the page write functions will be taken by | 2594 | * be taken by surprise. |
2567 | * surprise. | ||
2568 | */ | 2595 | */ |
2569 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2596 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
2570 | struct buffer_head *bh_result, int create) | 2597 | struct buffer_head *bh_result, int create) |
@@ -2595,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2595 | int ret = 0; | 2622 | int ret = 0; |
2596 | int err; | 2623 | int err; |
2597 | 2624 | ||
2625 | ClearPageChecked(page); | ||
2598 | page_bufs = page_buffers(page); | 2626 | page_bufs = page_buffers(page); |
2599 | BUG_ON(!page_bufs); | 2627 | BUG_ON(!page_bufs); |
2600 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
@@ -2672,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2672 | static int ext4_writepage(struct page *page, | 2700 | static int ext4_writepage(struct page *page, |
2673 | struct writeback_control *wbc) | 2701 | struct writeback_control *wbc) |
2674 | { | 2702 | { |
2675 | int ret = 0; | 2703 | int ret = 0, commit_write = 0; |
2676 | loff_t size; | 2704 | loff_t size; |
2677 | unsigned int len; | 2705 | unsigned int len; |
2678 | struct buffer_head *page_bufs = NULL; | 2706 | struct buffer_head *page_bufs = NULL; |
@@ -2685,73 +2713,46 @@ static int ext4_writepage(struct page *page, | |||
2685 | else | 2713 | else |
2686 | len = PAGE_CACHE_SIZE; | 2714 | len = PAGE_CACHE_SIZE; |
2687 | 2715 | ||
2688 | if (page_has_buffers(page)) { | 2716 | /* |
2689 | page_bufs = page_buffers(page); | 2717 | * If the page does not have buffers (for whatever reason), |
2690 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2718 | * try to create them using __block_write_begin. If this |
2691 | ext4_bh_delay_or_unwritten)) { | 2719 | * fails, redirty the page and move on. |
2692 | /* | 2720 | */ |
2693 | * We don't want to do block allocation | 2721 | if (!page_buffers(page)) { |
2694 | * So redirty the page and return | 2722 | if (__block_write_begin(page, 0, len, |
2695 | * We may reach here when we do a journal commit | 2723 | noalloc_get_block_write)) { |
2696 | * via journal_submit_inode_data_buffers. | 2724 | redirty_page: |
2697 | * If we don't have mapping block we just ignore | ||
2698 | * them. We can also reach here via shrink_page_list | ||
2699 | */ | ||
2700 | redirty_page_for_writepage(wbc, page); | 2725 | redirty_page_for_writepage(wbc, page); |
2701 | unlock_page(page); | 2726 | unlock_page(page); |
2702 | return 0; | 2727 | return 0; |
2703 | } | 2728 | } |
2704 | } else { | 2729 | commit_write = 1; |
2730 | } | ||
2731 | page_bufs = page_buffers(page); | ||
2732 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2733 | ext4_bh_delay_or_unwritten)) { | ||
2705 | /* | 2734 | /* |
2706 | * The test for page_has_buffers() is subtle: | 2735 | * We don't want to do block allocation So redirty the |
2707 | * We know the page is dirty but it lost buffers. That means | 2736 | * page and return We may reach here when we do a |
2708 | * that at some moment in time after write_begin()/write_end() | 2737 | * journal commit via |
2709 | * has been called all buffers have been clean and thus they | 2738 | * journal_submit_inode_data_buffers. If we don't |
2710 | * must have been written at least once. So they are all | 2739 | * have mapping block we just ignore them. We can also |
2711 | * mapped and we can happily proceed with mapping them | 2740 | * reach here via shrink_page_list |
2712 | * and writing the page. | ||
2713 | * | ||
2714 | * Try to initialize the buffer_heads and check whether | ||
2715 | * all are mapped and non delay. We don't want to | ||
2716 | * do block allocation here. | ||
2717 | */ | 2741 | */ |
2718 | ret = block_prepare_write(page, 0, len, | 2742 | goto redirty_page; |
2719 | noalloc_get_block_write); | 2743 | } |
2720 | if (!ret) { | 2744 | if (commit_write) |
2721 | page_bufs = page_buffers(page); | ||
2722 | /* check whether all are mapped and non delay */ | ||
2723 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2724 | ext4_bh_delay_or_unwritten)) { | ||
2725 | redirty_page_for_writepage(wbc, page); | ||
2726 | unlock_page(page); | ||
2727 | return 0; | ||
2728 | } | ||
2729 | } else { | ||
2730 | /* | ||
2731 | * We can't do block allocation here | ||
2732 | * so just redity the page and unlock | ||
2733 | * and return | ||
2734 | */ | ||
2735 | redirty_page_for_writepage(wbc, page); | ||
2736 | unlock_page(page); | ||
2737 | return 0; | ||
2738 | } | ||
2739 | /* now mark the buffer_heads as dirty and uptodate */ | 2745 | /* now mark the buffer_heads as dirty and uptodate */ |
2740 | block_commit_write(page, 0, len); | 2746 | block_commit_write(page, 0, len); |
2741 | } | ||
2742 | 2747 | ||
2743 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2748 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2744 | /* | 2749 | /* |
2745 | * It's mmapped pagecache. Add buffers and journal it. There | 2750 | * It's mmapped pagecache. Add buffers and journal it. There |
2746 | * doesn't seem much point in redirtying the page here. | 2751 | * doesn't seem much point in redirtying the page here. |
2747 | */ | 2752 | */ |
2748 | ClearPageChecked(page); | ||
2749 | return __ext4_journalled_writepage(page, len); | 2753 | return __ext4_journalled_writepage(page, len); |
2750 | } | ||
2751 | 2754 | ||
2752 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2755 | if (buffer_uninit(page_bufs)) { |
2753 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | ||
2754 | else if (page_bufs && buffer_uninit(page_bufs)) { | ||
2755 | ext4_set_bh_endio(page_bufs, inode); | 2756 | ext4_set_bh_endio(page_bufs, inode); |
2756 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2757 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2757 | wbc, ext4_end_io_buffer_write); | 2758 | wbc, ext4_end_io_buffer_write); |
@@ -2798,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2798 | */ | 2799 | */ |
2799 | static int write_cache_pages_da(struct address_space *mapping, | 2800 | static int write_cache_pages_da(struct address_space *mapping, |
2800 | struct writeback_control *wbc, | 2801 | struct writeback_control *wbc, |
2801 | struct mpage_da_data *mpd) | 2802 | struct mpage_da_data *mpd, |
2803 | pgoff_t *done_index) | ||
2802 | { | 2804 | { |
2803 | int ret = 0; | 2805 | int ret = 0; |
2804 | int done = 0; | 2806 | int done = 0; |
2805 | struct pagevec pvec; | 2807 | struct pagevec pvec; |
2806 | int nr_pages; | 2808 | unsigned nr_pages; |
2807 | pgoff_t index; | 2809 | pgoff_t index; |
2808 | pgoff_t end; /* Inclusive */ | 2810 | pgoff_t end; /* Inclusive */ |
2809 | long nr_to_write = wbc->nr_to_write; | 2811 | long nr_to_write = wbc->nr_to_write; |
2812 | int tag; | ||
2810 | 2813 | ||
2811 | pagevec_init(&pvec, 0); | 2814 | pagevec_init(&pvec, 0); |
2812 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2815 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2813 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2816 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2814 | 2817 | ||
2818 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2819 | tag = PAGECACHE_TAG_TOWRITE; | ||
2820 | else | ||
2821 | tag = PAGECACHE_TAG_DIRTY; | ||
2822 | |||
2823 | *done_index = index; | ||
2815 | while (!done && (index <= end)) { | 2824 | while (!done && (index <= end)) { |
2816 | int i; | 2825 | int i; |
2817 | 2826 | ||
2818 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2827 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2819 | PAGECACHE_TAG_DIRTY, | ||
2820 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2828 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2821 | if (nr_pages == 0) | 2829 | if (nr_pages == 0) |
2822 | break; | 2830 | break; |
@@ -2836,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2836 | break; | 2844 | break; |
2837 | } | 2845 | } |
2838 | 2846 | ||
2847 | *done_index = page->index + 1; | ||
2848 | |||
2839 | lock_page(page); | 2849 | lock_page(page); |
2840 | 2850 | ||
2841 | /* | 2851 | /* |
@@ -2921,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2921 | long desired_nr_to_write, nr_to_writebump = 0; | 2931 | long desired_nr_to_write, nr_to_writebump = 0; |
2922 | loff_t range_start = wbc->range_start; | 2932 | loff_t range_start = wbc->range_start; |
2923 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2933 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2934 | pgoff_t done_index = 0; | ||
2935 | pgoff_t end; | ||
2924 | 2936 | ||
2925 | trace_ext4_da_writepages(inode, wbc); | 2937 | trace_ext4_da_writepages(inode, wbc); |
2926 | 2938 | ||
@@ -2956,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2956 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2968 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2957 | wbc->range_end = LLONG_MAX; | 2969 | wbc->range_end = LLONG_MAX; |
2958 | wbc->range_cyclic = 0; | 2970 | wbc->range_cyclic = 0; |
2959 | } else | 2971 | end = -1; |
2972 | } else { | ||
2960 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2973 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2974 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2975 | } | ||
2961 | 2976 | ||
2962 | /* | 2977 | /* |
2963 | * This works around two forms of stupidity. The first is in | 2978 | * This works around two forms of stupidity. The first is in |
@@ -2976,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2976 | * sbi->max_writeback_mb_bump whichever is smaller. | 2991 | * sbi->max_writeback_mb_bump whichever is smaller. |
2977 | */ | 2992 | */ |
2978 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2993 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
2979 | if (!range_cyclic && range_whole) | 2994 | if (!range_cyclic && range_whole) { |
2980 | desired_nr_to_write = wbc->nr_to_write * 8; | 2995 | if (wbc->nr_to_write == LONG_MAX) |
2981 | else | 2996 | desired_nr_to_write = wbc->nr_to_write; |
2997 | else | ||
2998 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2999 | } else | ||
2982 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 3000 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
2983 | max_pages); | 3001 | max_pages); |
2984 | if (desired_nr_to_write > max_pages) | 3002 | if (desired_nr_to_write > max_pages) |
@@ -2995,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2995 | pages_skipped = wbc->pages_skipped; | 3013 | pages_skipped = wbc->pages_skipped; |
2996 | 3014 | ||
2997 | retry: | 3015 | retry: |
3016 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3017 | tag_pages_for_writeback(mapping, index, end); | ||
3018 | |||
2998 | while (!ret && wbc->nr_to_write > 0) { | 3019 | while (!ret && wbc->nr_to_write > 0) { |
2999 | 3020 | ||
3000 | /* | 3021 | /* |
@@ -3033,16 +3054,14 @@ retry: | |||
3033 | mpd.io_done = 0; | 3054 | mpd.io_done = 0; |
3034 | mpd.pages_written = 0; | 3055 | mpd.pages_written = 0; |
3035 | mpd.retval = 0; | 3056 | mpd.retval = 0; |
3036 | ret = write_cache_pages_da(mapping, wbc, &mpd); | 3057 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3037 | /* | 3058 | /* |
3038 | * If we have a contiguous extent of pages and we | 3059 | * If we have a contiguous extent of pages and we |
3039 | * haven't done the I/O yet, map the blocks and submit | 3060 | * haven't done the I/O yet, map the blocks and submit |
3040 | * them for I/O. | 3061 | * them for I/O. |
3041 | */ | 3062 | */ |
3042 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3063 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3043 | if (mpage_da_map_blocks(&mpd) == 0) | 3064 | mpage_da_map_and_submit(&mpd); |
3044 | mpage_da_submit_io(&mpd); | ||
3045 | mpd.io_done = 1; | ||
3046 | ret = MPAGE_DA_EXTENT_TAIL; | 3065 | ret = MPAGE_DA_EXTENT_TAIL; |
3047 | } | 3066 | } |
3048 | trace_ext4_da_write_pages(inode, &mpd); | 3067 | trace_ext4_da_write_pages(inode, &mpd); |
@@ -3089,14 +3108,13 @@ retry: | |||
3089 | __func__, wbc->nr_to_write, ret); | 3108 | __func__, wbc->nr_to_write, ret); |
3090 | 3109 | ||
3091 | /* Update index */ | 3110 | /* Update index */ |
3092 | index += pages_written; | ||
3093 | wbc->range_cyclic = range_cyclic; | 3111 | wbc->range_cyclic = range_cyclic; |
3094 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3112 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3095 | /* | 3113 | /* |
3096 | * set the writeback_index so that range_cyclic | 3114 | * set the writeback_index so that range_cyclic |
3097 | * mode will write it back later | 3115 | * mode will write it back later |
3098 | */ | 3116 | */ |
3099 | mapping->writeback_index = index; | 3117 | mapping->writeback_index = done_index; |
3100 | 3118 | ||
3101 | out_writepages: | 3119 | out_writepages: |
3102 | wbc->nr_to_write -= nr_to_writebump; | 3120 | wbc->nr_to_write -= nr_to_writebump; |
@@ -3146,13 +3164,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3146 | int ret, retries = 0; | 3164 | int ret, retries = 0; |
3147 | struct page *page; | 3165 | struct page *page; |
3148 | pgoff_t index; | 3166 | pgoff_t index; |
3149 | unsigned from, to; | ||
3150 | struct inode *inode = mapping->host; | 3167 | struct inode *inode = mapping->host; |
3151 | handle_t *handle; | 3168 | handle_t *handle; |
3152 | 3169 | ||
3153 | index = pos >> PAGE_CACHE_SHIFT; | 3170 | index = pos >> PAGE_CACHE_SHIFT; |
3154 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
3155 | to = from + len; | ||
3156 | 3171 | ||
3157 | if (ext4_nonda_switch(inode->i_sb)) { | 3172 | if (ext4_nonda_switch(inode->i_sb)) { |
3158 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | 3173 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; |
@@ -3185,8 +3200,7 @@ retry: | |||
3185 | } | 3200 | } |
3186 | *pagep = page; | 3201 | *pagep = page; |
3187 | 3202 | ||
3188 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 3203 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
3189 | ext4_da_get_block_prep); | ||
3190 | if (ret < 0) { | 3204 | if (ret < 0) { |
3191 | unlock_page(page); | 3205 | unlock_page(page); |
3192 | ext4_journal_stop(handle); | 3206 | ext4_journal_stop(handle); |
@@ -3435,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3435 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3449 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3436 | } | 3450 | } |
3437 | 3451 | ||
3438 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3439 | { | ||
3440 | BUG_ON(!io); | ||
3441 | if (io->page) | ||
3442 | put_page(io->page); | ||
3443 | iput(io->inode); | ||
3444 | kfree(io); | ||
3445 | } | ||
3446 | |||
3447 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3452 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3448 | { | 3453 | { |
3449 | struct buffer_head *head, *bh; | 3454 | struct buffer_head *head, *bh; |
@@ -3545,15 +3550,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
3545 | 3550 | ||
3546 | retry: | 3551 | retry: |
3547 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 3552 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
3548 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 3553 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3549 | inode->i_sb->s_bdev, iov, | 3554 | inode->i_sb->s_bdev, iov, |
3550 | offset, nr_segs, | 3555 | offset, nr_segs, |
3551 | ext4_get_block, NULL); | 3556 | ext4_get_block, NULL, NULL, 0); |
3552 | else | 3557 | else { |
3553 | ret = blockdev_direct_IO(rw, iocb, inode, | 3558 | ret = blockdev_direct_IO(rw, iocb, inode, |
3554 | inode->i_sb->s_bdev, iov, | 3559 | inode->i_sb->s_bdev, iov, |
3555 | offset, nr_segs, | 3560 | offset, nr_segs, |
3556 | ext4_get_block, NULL); | 3561 | ext4_get_block, NULL); |
3562 | |||
3563 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
3564 | loff_t isize = i_size_read(inode); | ||
3565 | loff_t end = offset + iov_length(iov, nr_segs); | ||
3566 | |||
3567 | if (end > isize) | ||
3568 | vmtruncate(inode, isize); | ||
3569 | } | ||
3570 | } | ||
3557 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3571 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3558 | goto retry; | 3572 | goto retry; |
3559 | 3573 | ||
@@ -3611,171 +3625,9 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3611 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3625 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3612 | } | 3626 | } |
3613 | 3627 | ||
3614 | static void dump_completed_IO(struct inode * inode) | ||
3615 | { | ||
3616 | #ifdef EXT4_DEBUG | ||
3617 | struct list_head *cur, *before, *after; | ||
3618 | ext4_io_end_t *io, *io0, *io1; | ||
3619 | unsigned long flags; | ||
3620 | |||
3621 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
3622 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
3623 | return; | ||
3624 | } | ||
3625 | |||
3626 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
3627 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3628 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3629 | cur = &io->list; | ||
3630 | before = cur->prev; | ||
3631 | io0 = container_of(before, ext4_io_end_t, list); | ||
3632 | after = cur->next; | ||
3633 | io1 = container_of(after, ext4_io_end_t, list); | ||
3634 | |||
3635 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3636 | io, inode->i_ino, io0, io1); | ||
3637 | } | ||
3638 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3639 | #endif | ||
3640 | } | ||
3641 | |||
3642 | /* | ||
3643 | * check a range of space and convert unwritten extents to written. | ||
3644 | */ | ||
3645 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3646 | { | ||
3647 | struct inode *inode = io->inode; | ||
3648 | loff_t offset = io->offset; | ||
3649 | ssize_t size = io->size; | ||
3650 | int ret = 0; | ||
3651 | |||
3652 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3653 | "list->prev 0x%p\n", | ||
3654 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3655 | |||
3656 | if (list_empty(&io->list)) | ||
3657 | return ret; | ||
3658 | |||
3659 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3660 | return ret; | ||
3661 | |||
3662 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3663 | if (ret < 0) { | ||
3664 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3665 | "extents to written extents, error is %d" | ||
3666 | " io is still on inode %lu aio dio list\n", | ||
3667 | __func__, ret, inode->i_ino); | ||
3668 | return ret; | ||
3669 | } | ||
3670 | |||
3671 | /* clear the DIO AIO unwritten flag */ | ||
3672 | io->flag = 0; | ||
3673 | return ret; | ||
3674 | } | ||
3675 | |||
3676 | /* | ||
3677 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3678 | */ | ||
3679 | static void ext4_end_io_work(struct work_struct *work) | ||
3680 | { | ||
3681 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3682 | struct inode *inode = io->inode; | ||
3683 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3684 | unsigned long flags; | ||
3685 | int ret; | ||
3686 | |||
3687 | mutex_lock(&inode->i_mutex); | ||
3688 | ret = ext4_end_io_nolock(io); | ||
3689 | if (ret < 0) { | ||
3690 | mutex_unlock(&inode->i_mutex); | ||
3691 | return; | ||
3692 | } | ||
3693 | |||
3694 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3695 | if (!list_empty(&io->list)) | ||
3696 | list_del_init(&io->list); | ||
3697 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3698 | mutex_unlock(&inode->i_mutex); | ||
3699 | ext4_free_io_end(io); | ||
3700 | } | ||
3701 | |||
3702 | /* | ||
3703 | * This function is called from ext4_sync_file(). | ||
3704 | * | ||
3705 | * When IO is completed, the work to convert unwritten extents to | ||
3706 | * written is queued on workqueue but may not get immediately | ||
3707 | * scheduled. When fsync is called, we need to ensure the | ||
3708 | * conversion is complete before fsync returns. | ||
3709 | * The inode keeps track of a list of pending/completed IO that | ||
3710 | * might needs to do the conversion. This function walks through | ||
3711 | * the list and convert the related unwritten extents for completed IO | ||
3712 | * to written. | ||
3713 | * The function return the number of pending IOs on success. | ||
3714 | */ | ||
3715 | int flush_completed_IO(struct inode *inode) | ||
3716 | { | ||
3717 | ext4_io_end_t *io; | ||
3718 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3719 | unsigned long flags; | ||
3720 | int ret = 0; | ||
3721 | int ret2 = 0; | ||
3722 | |||
3723 | if (list_empty(&ei->i_completed_io_list)) | ||
3724 | return ret; | ||
3725 | |||
3726 | dump_completed_IO(inode); | ||
3727 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3728 | while (!list_empty(&ei->i_completed_io_list)){ | ||
3729 | io = list_entry(ei->i_completed_io_list.next, | ||
3730 | ext4_io_end_t, list); | ||
3731 | /* | ||
3732 | * Calling ext4_end_io_nolock() to convert completed | ||
3733 | * IO to written. | ||
3734 | * | ||
3735 | * When ext4_sync_file() is called, run_queue() may already | ||
3736 | * about to flush the work corresponding to this io structure. | ||
3737 | * It will be upset if it founds the io structure related | ||
3738 | * to the work-to-be schedule is freed. | ||
3739 | * | ||
3740 | * Thus we need to keep the io structure still valid here after | ||
3741 | * convertion finished. The io structure has a flag to | ||
3742 | * avoid double converting from both fsync and background work | ||
3743 | * queue work. | ||
3744 | */ | ||
3745 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3746 | ret = ext4_end_io_nolock(io); | ||
3747 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3748 | if (ret < 0) | ||
3749 | ret2 = ret; | ||
3750 | else | ||
3751 | list_del_init(&io->list); | ||
3752 | } | ||
3753 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3754 | return (ret2 < 0) ? ret2 : 0; | ||
3755 | } | ||
3756 | |||
3757 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3758 | { | ||
3759 | ext4_io_end_t *io = NULL; | ||
3760 | |||
3761 | io = kmalloc(sizeof(*io), flags); | ||
3762 | |||
3763 | if (io) { | ||
3764 | igrab(inode); | ||
3765 | io->inode = inode; | ||
3766 | io->flag = 0; | ||
3767 | io->offset = 0; | ||
3768 | io->size = 0; | ||
3769 | io->page = NULL; | ||
3770 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3771 | INIT_LIST_HEAD(&io->list); | ||
3772 | } | ||
3773 | |||
3774 | return io; | ||
3775 | } | ||
3776 | |||
3777 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3628 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3778 | ssize_t size, void *private) | 3629 | ssize_t size, void *private, int ret, |
3630 | bool is_async) | ||
3779 | { | 3631 | { |
3780 | ext4_io_end_t *io_end = iocb->private; | 3632 | ext4_io_end_t *io_end = iocb->private; |
3781 | struct workqueue_struct *wq; | 3633 | struct workqueue_struct *wq; |
@@ -3784,7 +3636,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3784 | 3636 | ||
3785 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3637 | /* if not async direct IO or dio with 0 bytes write, just return */ |
3786 | if (!io_end || !size) | 3638 | if (!io_end || !size) |
3787 | return; | 3639 | goto out; |
3788 | 3640 | ||
3789 | ext_debug("ext4_end_io_dio(): io_end 0x%p" | 3641 | ext_debug("ext4_end_io_dio(): io_end 0x%p" |
3790 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 3642 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", |
@@ -3792,25 +3644,31 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3792 | size); | 3644 | size); |
3793 | 3645 | ||
3794 | /* if not aio dio with unwritten extents, just free io and return */ | 3646 | /* if not aio dio with unwritten extents, just free io and return */ |
3795 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3647 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3796 | ext4_free_io_end(io_end); | 3648 | ext4_free_io_end(io_end); |
3797 | iocb->private = NULL; | 3649 | iocb->private = NULL; |
3650 | out: | ||
3651 | if (is_async) | ||
3652 | aio_complete(iocb, ret, 0); | ||
3798 | return; | 3653 | return; |
3799 | } | 3654 | } |
3800 | 3655 | ||
3801 | io_end->offset = offset; | 3656 | io_end->offset = offset; |
3802 | io_end->size = size; | 3657 | io_end->size = size; |
3803 | io_end->flag = EXT4_IO_UNWRITTEN; | 3658 | if (is_async) { |
3659 | io_end->iocb = iocb; | ||
3660 | io_end->result = ret; | ||
3661 | } | ||
3804 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3662 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3805 | 3663 | ||
3806 | /* queue the work to convert unwritten extents to written */ | ||
3807 | queue_work(wq, &io_end->work); | ||
3808 | |||
3809 | /* Add the io_end to per-inode completed aio dio list*/ | 3664 | /* Add the io_end to per-inode completed aio dio list*/ |
3810 | ei = EXT4_I(io_end->inode); | 3665 | ei = EXT4_I(io_end->inode); |
3811 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3666 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3812 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3667 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3813 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3668 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3669 | |||
3670 | /* queue the work to convert unwritten extents to written */ | ||
3671 | queue_work(wq, &io_end->work); | ||
3814 | iocb->private = NULL; | 3672 | iocb->private = NULL; |
3815 | } | 3673 | } |
3816 | 3674 | ||
@@ -3831,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3831 | goto out; | 3689 | goto out; |
3832 | } | 3690 | } |
3833 | 3691 | ||
3834 | io_end->flag = EXT4_IO_UNWRITTEN; | 3692 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3835 | inode = io_end->inode; | 3693 | inode = io_end->inode; |
3836 | 3694 | ||
3837 | /* Add the io_end to per-inode completed io list*/ | 3695 | /* Add the io_end to per-inode completed io list*/ |
@@ -3937,7 +3795,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3937 | return -ENOMEM; | 3795 | return -ENOMEM; |
3938 | /* | 3796 | /* |
3939 | * we save the io structure for current async | 3797 | * we save the io structure for current async |
3940 | * direct IO, so that later ext4_get_blocks() | 3798 | * direct IO, so that later ext4_map_blocks() |
3941 | * could flag the io structure whether there | 3799 | * could flag the io structure whether there |
3942 | * is a unwritten extents needs to be converted | 3800 | * is a unwritten extents needs to be converted |
3943 | * when IO is completed. | 3801 | * when IO is completed. |
@@ -4128,17 +3986,6 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4128 | length = blocksize - (offset & (blocksize - 1)); | 3986 | length = blocksize - (offset & (blocksize - 1)); |
4129 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3987 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
4130 | 3988 | ||
4131 | /* | ||
4132 | * For "nobh" option, we can only work if we don't need to | ||
4133 | * read-in the page - otherwise we create buffers to do the IO. | ||
4134 | */ | ||
4135 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | ||
4136 | ext4_should_writeback_data(inode) && PageUptodate(page)) { | ||
4137 | zero_user(page, offset, length); | ||
4138 | set_page_dirty(page); | ||
4139 | goto unlock; | ||
4140 | } | ||
4141 | |||
4142 | if (!page_has_buffers(page)) | 3989 | if (!page_has_buffers(page)) |
4143 | create_empty_buffers(page, blocksize, 0); | 3990 | create_empty_buffers(page, blocksize, 0); |
4144 | 3991 | ||
@@ -4488,9 +4335,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4488 | * (should be rare). | 4335 | * (should be rare). |
4489 | */ | 4336 | */ |
4490 | if (!bh) { | 4337 | if (!bh) { |
4491 | EXT4_ERROR_INODE(inode, | 4338 | EXT4_ERROR_INODE_BLOCK(inode, nr, |
4492 | "Read failure block=%llu", | 4339 | "Read failure"); |
4493 | (unsigned long long) nr); | ||
4494 | continue; | 4340 | continue; |
4495 | } | 4341 | } |
4496 | 4342 | ||
@@ -4502,27 +4348,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4502 | depth); | 4348 | depth); |
4503 | 4349 | ||
4504 | /* | 4350 | /* |
4505 | * We've probably journalled the indirect block several | ||
4506 | * times during the truncate. But it's no longer | ||
4507 | * needed and we now drop it from the transaction via | ||
4508 | * jbd2_journal_revoke(). | ||
4509 | * | ||
4510 | * That's easy if it's exclusively part of this | ||
4511 | * transaction. But if it's part of the committing | ||
4512 | * transaction then jbd2_journal_forget() will simply | ||
4513 | * brelse() it. That means that if the underlying | ||
4514 | * block is reallocated in ext4_get_block(), | ||
4515 | * unmap_underlying_metadata() will find this block | ||
4516 | * and will try to get rid of it. damn, damn. | ||
4517 | * | ||
4518 | * If this block has already been committed to the | ||
4519 | * journal, a revoke record will be written. And | ||
4520 | * revoke records must be emitted *before* clearing | ||
4521 | * this block's bit in the bitmaps. | ||
4522 | */ | ||
4523 | ext4_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
4524 | |||
4525 | /* | ||
4526 | * Everything below this this pointer has been | 4351 | * Everything below this this pointer has been |
4527 | * released. Now let this top-of-subtree go. | 4352 | * released. Now let this top-of-subtree go. |
4528 | * | 4353 | * |
@@ -4546,8 +4371,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4546 | blocks_for_truncate(inode)); | 4371 | blocks_for_truncate(inode)); |
4547 | } | 4372 | } |
4548 | 4373 | ||
4374 | /* | ||
4375 | * The forget flag here is critical because if | ||
4376 | * we are journaling (and not doing data | ||
4377 | * journaling), we have to make sure a revoke | ||
4378 | * record is written to prevent the journal | ||
4379 | * replay from overwriting the (former) | ||
4380 | * indirect block if it gets reallocated as a | ||
4381 | * data block. This must happen in the same | ||
4382 | * transaction where the data blocks are | ||
4383 | * actually freed. | ||
4384 | */ | ||
4549 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4385 | ext4_free_blocks(handle, inode, 0, nr, 1, |
4550 | EXT4_FREE_BLOCKS_METADATA); | 4386 | EXT4_FREE_BLOCKS_METADATA| |
4387 | EXT4_FREE_BLOCKS_FORGET); | ||
4551 | 4388 | ||
4552 | if (parent_bh) { | 4389 | if (parent_bh) { |
4553 | /* | 4390 | /* |
@@ -4805,8 +4642,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4805 | 4642 | ||
4806 | bh = sb_getblk(sb, block); | 4643 | bh = sb_getblk(sb, block); |
4807 | if (!bh) { | 4644 | if (!bh) { |
4808 | EXT4_ERROR_INODE(inode, "unable to read inode block - " | 4645 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4809 | "block %llu", block); | 4646 | "unable to read itable block"); |
4810 | return -EIO; | 4647 | return -EIO; |
4811 | } | 4648 | } |
4812 | if (!buffer_uptodate(bh)) { | 4649 | if (!buffer_uptodate(bh)) { |
@@ -4904,8 +4741,8 @@ make_io: | |||
4904 | submit_bh(READ_META, bh); | 4741 | submit_bh(READ_META, bh); |
4905 | wait_on_buffer(bh); | 4742 | wait_on_buffer(bh); |
4906 | if (!buffer_uptodate(bh)) { | 4743 | if (!buffer_uptodate(bh)) { |
4907 | EXT4_ERROR_INODE(inode, "unable to read inode " | 4744 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4908 | "block %llu", block); | 4745 | "unable to read itable block"); |
4909 | brelse(bh); | 4746 | brelse(bh); |
4910 | return -EIO; | 4747 | return -EIO; |
4911 | } | 4748 | } |
@@ -4976,7 +4813,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | |||
4976 | /* we are using combined 48 bit field */ | 4813 | /* we are using combined 48 bit field */ |
4977 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | | 4814 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | |
4978 | le32_to_cpu(raw_inode->i_blocks_lo); | 4815 | le32_to_cpu(raw_inode->i_blocks_lo); |
4979 | if (ei->i_flags & EXT4_HUGE_FILE_FL) { | 4816 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { |
4980 | /* i_blocks represent file system block size */ | 4817 | /* i_blocks represent file system block size */ |
4981 | return i_blocks << (inode->i_blkbits - 9); | 4818 | return i_blocks << (inode->i_blkbits - 9); |
4982 | } else { | 4819 | } else { |
@@ -5072,7 +4909,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5072 | transaction_t *transaction; | 4909 | transaction_t *transaction; |
5073 | tid_t tid; | 4910 | tid_t tid; |
5074 | 4911 | ||
5075 | spin_lock(&journal->j_state_lock); | 4912 | read_lock(&journal->j_state_lock); |
5076 | if (journal->j_running_transaction) | 4913 | if (journal->j_running_transaction) |
5077 | transaction = journal->j_running_transaction; | 4914 | transaction = journal->j_running_transaction; |
5078 | else | 4915 | else |
@@ -5081,7 +4918,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5081 | tid = transaction->t_tid; | 4918 | tid = transaction->t_tid; |
5082 | else | 4919 | else |
5083 | tid = journal->j_commit_sequence; | 4920 | tid = journal->j_commit_sequence; |
5084 | spin_unlock(&journal->j_state_lock); | 4921 | read_unlock(&journal->j_state_lock); |
5085 | ei->i_sync_tid = tid; | 4922 | ei->i_sync_tid = tid; |
5086 | ei->i_datasync_tid = tid; | 4923 | ei->i_datasync_tid = tid; |
5087 | } | 4924 | } |
@@ -5126,7 +4963,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5126 | ei->i_file_acl); | 4963 | ei->i_file_acl); |
5127 | ret = -EIO; | 4964 | ret = -EIO; |
5128 | goto bad_inode; | 4965 | goto bad_inode; |
5129 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | 4966 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
5130 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4967 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
5131 | (S_ISLNK(inode->i_mode) && | 4968 | (S_ISLNK(inode->i_mode) && |
5132 | !ext4_inode_is_fast_symlink(inode))) | 4969 | !ext4_inode_is_fast_symlink(inode))) |
@@ -5406,9 +5243,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5406 | if (wbc->sync_mode == WB_SYNC_ALL) | 5243 | if (wbc->sync_mode == WB_SYNC_ALL) |
5407 | sync_dirty_buffer(iloc.bh); | 5244 | sync_dirty_buffer(iloc.bh); |
5408 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5245 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5409 | EXT4_ERROR_INODE(inode, | 5246 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
5410 | "IO error syncing inode (block=%llu)", | 5247 | "IO error syncing inode"); |
5411 | (unsigned long long) iloc.bh->b_blocknr); | ||
5412 | err = -EIO; | 5248 | err = -EIO; |
5413 | } | 5249 | } |
5414 | brelse(iloc.bh); | 5250 | brelse(iloc.bh); |
@@ -5444,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5444 | { | 5280 | { |
5445 | struct inode *inode = dentry->d_inode; | 5281 | struct inode *inode = dentry->d_inode; |
5446 | int error, rc = 0; | 5282 | int error, rc = 0; |
5283 | int orphan = 0; | ||
5447 | const unsigned int ia_valid = attr->ia_valid; | 5284 | const unsigned int ia_valid = attr->ia_valid; |
5448 | 5285 | ||
5449 | error = inode_change_ok(inode, attr); | 5286 | error = inode_change_ok(inode, attr); |
@@ -5483,10 +5320,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5483 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 5320 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5484 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5321 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5485 | 5322 | ||
5486 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | 5323 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
5487 | error = -EFBIG; | 5324 | return -EFBIG; |
5488 | goto err_out; | ||
5489 | } | ||
5490 | } | 5325 | } |
5491 | } | 5326 | } |
5492 | 5327 | ||
@@ -5501,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5501 | error = PTR_ERR(handle); | 5336 | error = PTR_ERR(handle); |
5502 | goto err_out; | 5337 | goto err_out; |
5503 | } | 5338 | } |
5504 | 5339 | if (ext4_handle_valid(handle)) { | |
5505 | error = ext4_orphan_add(handle, inode); | 5340 | error = ext4_orphan_add(handle, inode); |
5341 | orphan = 1; | ||
5342 | } | ||
5506 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5343 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5507 | rc = ext4_mark_inode_dirty(handle, inode); | 5344 | rc = ext4_mark_inode_dirty(handle, inode); |
5508 | if (!error) | 5345 | if (!error) |
@@ -5520,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5520 | goto err_out; | 5357 | goto err_out; |
5521 | } | 5358 | } |
5522 | ext4_orphan_del(handle, inode); | 5359 | ext4_orphan_del(handle, inode); |
5360 | orphan = 0; | ||
5523 | ext4_journal_stop(handle); | 5361 | ext4_journal_stop(handle); |
5524 | goto err_out; | 5362 | goto err_out; |
5525 | } | 5363 | } |
@@ -5529,12 +5367,20 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5529 | ext4_truncate(inode); | 5367 | ext4_truncate(inode); |
5530 | } | 5368 | } |
5531 | 5369 | ||
5532 | rc = inode_setattr(inode, attr); | 5370 | if ((attr->ia_valid & ATTR_SIZE) && |
5371 | attr->ia_size != i_size_read(inode)) | ||
5372 | rc = vmtruncate(inode, attr->ia_size); | ||
5533 | 5373 | ||
5534 | /* If inode_setattr's call to ext4_truncate failed to get a | 5374 | if (!rc) { |
5535 | * transaction handle at all, we need to clean up the in-core | 5375 | setattr_copy(inode, attr); |
5536 | * orphan list manually. */ | 5376 | mark_inode_dirty(inode); |
5537 | if (inode->i_nlink) | 5377 | } |
5378 | |||
5379 | /* | ||
5380 | * If the call to ext4_truncate failed to get a transaction handle at | ||
5381 | * all, we need to clean up the in-core orphan list manually. | ||
5382 | */ | ||
5383 | if (orphan && inode->i_nlink) | ||
5538 | ext4_orphan_del(NULL, inode); | 5384 | ext4_orphan_del(NULL, inode); |
5539 | 5385 | ||
5540 | if (!rc && (ia_valid & ATTR_MODE)) | 5386 | if (!rc && (ia_valid & ATTR_MODE)) |
@@ -5617,7 +5463,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5617 | * | 5463 | * |
5618 | * Also account for superblock, inode, quota and xattr blocks | 5464 | * Also account for superblock, inode, quota and xattr blocks |
5619 | */ | 5465 | */ |
5620 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5466 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5621 | { | 5467 | { |
5622 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5468 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5623 | int gdpblocks; | 5469 | int gdpblocks; |
@@ -5688,7 +5534,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
5688 | * Calculate the journal credits for a chunk of data modification. | 5534 | * Calculate the journal credits for a chunk of data modification. |
5689 | * | 5535 | * |
5690 | * This is called from DIO, fallocate or whoever calling | 5536 | * This is called from DIO, fallocate or whoever calling |
5691 | * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. | 5537 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. |
5692 | * | 5538 | * |
5693 | * journal buffers for data blocks are not included here, as DIO | 5539 | * journal buffers for data blocks are not included here, as DIO |
5694 | * and fallocate do no need to journal data buffers. | 5540 | * and fallocate do no need to journal data buffers. |
@@ -5754,7 +5600,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5754 | { | 5600 | { |
5755 | struct ext4_inode *raw_inode; | 5601 | struct ext4_inode *raw_inode; |
5756 | struct ext4_xattr_ibody_header *header; | 5602 | struct ext4_xattr_ibody_header *header; |
5757 | struct ext4_xattr_entry *entry; | ||
5758 | 5603 | ||
5759 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) | 5604 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
5760 | return 0; | 5605 | return 0; |
@@ -5762,7 +5607,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5762 | raw_inode = ext4_raw_inode(&iloc); | 5607 | raw_inode = ext4_raw_inode(&iloc); |
5763 | 5608 | ||
5764 | header = IHDR(inode, raw_inode); | 5609 | header = IHDR(inode, raw_inode); |
5765 | entry = IFIRST(header); | ||
5766 | 5610 | ||
5767 | /* No extended attributes present */ | 5611 | /* No extended attributes present */ |
5768 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || | 5612 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 12b3bc026a68..c58eba34724a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -338,6 +338,14 @@ | |||
338 | static struct kmem_cache *ext4_pspace_cachep; | 338 | static struct kmem_cache *ext4_pspace_cachep; |
339 | static struct kmem_cache *ext4_ac_cachep; | 339 | static struct kmem_cache *ext4_ac_cachep; |
340 | static struct kmem_cache *ext4_free_ext_cachep; | 340 | static struct kmem_cache *ext4_free_ext_cachep; |
341 | |||
342 | /* We create slab caches for groupinfo data structures based on the | ||
343 | * superblock block size. There will be one per mounted filesystem for | ||
344 | * each unique s_blocksize_bits */ | ||
345 | #define NR_GRPINFO_CACHES \ | ||
346 | (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1) | ||
347 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; | ||
348 | |||
341 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 349 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
342 | ext4_group_t group); | 350 | ext4_group_t group); |
343 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 351 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
@@ -446,10 +454,11 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
446 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 454 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
447 | blocknr += first + i; | 455 | blocknr += first + i; |
448 | ext4_grp_locked_error(sb, e4b->bd_group, | 456 | ext4_grp_locked_error(sb, e4b->bd_group, |
449 | __func__, "double-free of inode" | 457 | inode ? inode->i_ino : 0, |
450 | " %lu's block %llu(bit %u in group %u)", | 458 | blocknr, |
451 | inode ? inode->i_ino : 0, blocknr, | 459 | "freeing block already freed " |
452 | first + i, e4b->bd_group); | 460 | "(bit %u)", |
461 | first + i); | ||
453 | } | 462 | } |
454 | mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); | 463 | mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); |
455 | } | 464 | } |
@@ -712,9 +721,9 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
712 | grp->bb_fragments = fragments; | 721 | grp->bb_fragments = fragments; |
713 | 722 | ||
714 | if (free != grp->bb_free) { | 723 | if (free != grp->bb_free) { |
715 | ext4_grp_locked_error(sb, group, __func__, | 724 | ext4_grp_locked_error(sb, group, 0, 0, |
716 | "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", | 725 | "%u blocks in bitmap, %u in gd", |
717 | group, free, grp->bb_free); | 726 | free, grp->bb_free); |
718 | /* | 727 | /* |
719 | * If we intent to continue, we consider group descritor | 728 | * If we intent to continue, we consider group descritor |
720 | * corrupt and update bb_free using bitmap value | 729 | * corrupt and update bb_free using bitmap value |
@@ -938,6 +947,85 @@ out: | |||
938 | } | 947 | } |
939 | 948 | ||
940 | /* | 949 | /* |
950 | * lock the group_info alloc_sem of all the groups | ||
951 | * belonging to the same buddy cache page. This | ||
952 | * make sure other parallel operation on the buddy | ||
953 | * cache doesn't happen whild holding the buddy cache | ||
954 | * lock | ||
955 | */ | ||
956 | static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | ||
957 | ext4_group_t group) | ||
958 | { | ||
959 | int i; | ||
960 | int block, pnum; | ||
961 | int blocks_per_page; | ||
962 | int groups_per_page; | ||
963 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
964 | ext4_group_t first_group; | ||
965 | struct ext4_group_info *grp; | ||
966 | |||
967 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
968 | /* | ||
969 | * the buddy cache inode stores the block bitmap | ||
970 | * and buddy information in consecutive blocks. | ||
971 | * So for each group we need two blocks. | ||
972 | */ | ||
973 | block = group * 2; | ||
974 | pnum = block / blocks_per_page; | ||
975 | first_group = pnum * blocks_per_page / 2; | ||
976 | |||
977 | groups_per_page = blocks_per_page >> 1; | ||
978 | if (groups_per_page == 0) | ||
979 | groups_per_page = 1; | ||
980 | /* read all groups the page covers into the cache */ | ||
981 | for (i = 0; i < groups_per_page; i++) { | ||
982 | |||
983 | if ((first_group + i) >= ngroups) | ||
984 | break; | ||
985 | grp = ext4_get_group_info(sb, first_group + i); | ||
986 | /* take all groups write allocation | ||
987 | * semaphore. This make sure there is | ||
988 | * no block allocation going on in any | ||
989 | * of that groups | ||
990 | */ | ||
991 | down_write_nested(&grp->alloc_sem, i); | ||
992 | } | ||
993 | return i; | ||
994 | } | ||
995 | |||
996 | static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
997 | ext4_group_t group, int locked_group) | ||
998 | { | ||
999 | int i; | ||
1000 | int block, pnum; | ||
1001 | int blocks_per_page; | ||
1002 | ext4_group_t first_group; | ||
1003 | struct ext4_group_info *grp; | ||
1004 | |||
1005 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1006 | /* | ||
1007 | * the buddy cache inode stores the block bitmap | ||
1008 | * and buddy information in consecutive blocks. | ||
1009 | * So for each group we need two blocks. | ||
1010 | */ | ||
1011 | block = group * 2; | ||
1012 | pnum = block / blocks_per_page; | ||
1013 | first_group = pnum * blocks_per_page / 2; | ||
1014 | /* release locks on all the groups */ | ||
1015 | for (i = 0; i < locked_group; i++) { | ||
1016 | |||
1017 | grp = ext4_get_group_info(sb, first_group + i); | ||
1018 | /* take all groups write allocation | ||
1019 | * semaphore. This make sure there is | ||
1020 | * no block allocation going on in any | ||
1021 | * of that groups | ||
1022 | */ | ||
1023 | up_write(&grp->alloc_sem); | ||
1024 | } | ||
1025 | |||
1026 | } | ||
1027 | |||
1028 | /* | ||
941 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | 1029 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the |
942 | * block group lock of all groups for this page; do not hold the BG lock when | 1030 | * block group lock of all groups for this page; do not hold the BG lock when |
943 | * calling this routine! | 1031 | * calling this routine! |
@@ -1296,10 +1384,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1296 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 1384 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
1297 | blocknr += block; | 1385 | blocknr += block; |
1298 | ext4_grp_locked_error(sb, e4b->bd_group, | 1386 | ext4_grp_locked_error(sb, e4b->bd_group, |
1299 | __func__, "double-free of inode" | 1387 | inode ? inode->i_ino : 0, |
1300 | " %lu's block %llu(bit %u in group %u)", | 1388 | blocknr, |
1301 | inode ? inode->i_ino : 0, blocknr, block, | 1389 | "freeing already freed block " |
1302 | e4b->bd_group); | 1390 | "(bit %u)", block); |
1303 | } | 1391 | } |
1304 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1392 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); |
1305 | e4b->bd_info->bb_counters[order]++; | 1393 | e4b->bd_info->bb_counters[order]++; |
@@ -1788,8 +1876,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1788 | * free blocks even though group info says we | 1876 | * free blocks even though group info says we |
1789 | * we have free blocks | 1877 | * we have free blocks |
1790 | */ | 1878 | */ |
1791 | ext4_grp_locked_error(sb, e4b->bd_group, | 1879 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
1792 | __func__, "%d free blocks as per " | 1880 | "%d free blocks as per " |
1793 | "group info. But bitmap says 0", | 1881 | "group info. But bitmap says 0", |
1794 | free); | 1882 | free); |
1795 | break; | 1883 | break; |
@@ -1798,8 +1886,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1798 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); | 1886 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); |
1799 | BUG_ON(ex.fe_len <= 0); | 1887 | BUG_ON(ex.fe_len <= 0); |
1800 | if (free < ex.fe_len) { | 1888 | if (free < ex.fe_len) { |
1801 | ext4_grp_locked_error(sb, e4b->bd_group, | 1889 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
1802 | __func__, "%d free blocks as per " | 1890 | "%d free blocks as per " |
1803 | "group info. But got %d blocks", | 1891 | "group info. But got %d blocks", |
1804 | free, ex.fe_len); | 1892 | free, ex.fe_len); |
1805 | /* | 1893 | /* |
@@ -1821,8 +1909,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1821 | 1909 | ||
1822 | /* | 1910 | /* |
1823 | * This is a special case for storages like raid5 | 1911 | * This is a special case for storages like raid5 |
1824 | * we try to find stripe-aligned chunks for stripe-size requests | 1912 | * we try to find stripe-aligned chunks for stripe-size-multiple requests |
1825 | * XXX should do so at least for multiples of stripe size as well | ||
1826 | */ | 1913 | */ |
1827 | static noinline_for_stack | 1914 | static noinline_for_stack |
1828 | void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | 1915 | void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, |
@@ -1915,91 +2002,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1915 | return 0; | 2002 | return 0; |
1916 | } | 2003 | } |
1917 | 2004 | ||
1918 | /* | ||
1919 | * lock the group_info alloc_sem of all the groups | ||
1920 | * belonging to the same buddy cache page. This | ||
1921 | * make sure other parallel operation on the buddy | ||
1922 | * cache doesn't happen whild holding the buddy cache | ||
1923 | * lock | ||
1924 | */ | ||
1925 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1926 | { | ||
1927 | int i; | ||
1928 | int block, pnum; | ||
1929 | int blocks_per_page; | ||
1930 | int groups_per_page; | ||
1931 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1932 | ext4_group_t first_group; | ||
1933 | struct ext4_group_info *grp; | ||
1934 | |||
1935 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1936 | /* | ||
1937 | * the buddy cache inode stores the block bitmap | ||
1938 | * and buddy information in consecutive blocks. | ||
1939 | * So for each group we need two blocks. | ||
1940 | */ | ||
1941 | block = group * 2; | ||
1942 | pnum = block / blocks_per_page; | ||
1943 | first_group = pnum * blocks_per_page / 2; | ||
1944 | |||
1945 | groups_per_page = blocks_per_page >> 1; | ||
1946 | if (groups_per_page == 0) | ||
1947 | groups_per_page = 1; | ||
1948 | /* read all groups the page covers into the cache */ | ||
1949 | for (i = 0; i < groups_per_page; i++) { | ||
1950 | |||
1951 | if ((first_group + i) >= ngroups) | ||
1952 | break; | ||
1953 | grp = ext4_get_group_info(sb, first_group + i); | ||
1954 | /* take all groups write allocation | ||
1955 | * semaphore. This make sure there is | ||
1956 | * no block allocation going on in any | ||
1957 | * of that groups | ||
1958 | */ | ||
1959 | down_write_nested(&grp->alloc_sem, i); | ||
1960 | } | ||
1961 | return i; | ||
1962 | } | ||
1963 | |||
1964 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1965 | ext4_group_t group, int locked_group) | ||
1966 | { | ||
1967 | int i; | ||
1968 | int block, pnum; | ||
1969 | int blocks_per_page; | ||
1970 | ext4_group_t first_group; | ||
1971 | struct ext4_group_info *grp; | ||
1972 | |||
1973 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1974 | /* | ||
1975 | * the buddy cache inode stores the block bitmap | ||
1976 | * and buddy information in consecutive blocks. | ||
1977 | * So for each group we need two blocks. | ||
1978 | */ | ||
1979 | block = group * 2; | ||
1980 | pnum = block / blocks_per_page; | ||
1981 | first_group = pnum * blocks_per_page / 2; | ||
1982 | /* release locks on all the groups */ | ||
1983 | for (i = 0; i < locked_group; i++) { | ||
1984 | |||
1985 | grp = ext4_get_group_info(sb, first_group + i); | ||
1986 | /* take all groups write allocation | ||
1987 | * semaphore. This make sure there is | ||
1988 | * no block allocation going on in any | ||
1989 | * of that groups | ||
1990 | */ | ||
1991 | up_write(&grp->alloc_sem); | ||
1992 | } | ||
1993 | |||
1994 | } | ||
1995 | |||
1996 | static noinline_for_stack int | 2005 | static noinline_for_stack int |
1997 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 2006 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1998 | { | 2007 | { |
1999 | ext4_group_t ngroups, group, i; | 2008 | ext4_group_t ngroups, group, i; |
2000 | int cr; | 2009 | int cr; |
2001 | int err = 0; | 2010 | int err = 0; |
2002 | int bsbits; | ||
2003 | struct ext4_sb_info *sbi; | 2011 | struct ext4_sb_info *sbi; |
2004 | struct super_block *sb; | 2012 | struct super_block *sb; |
2005 | struct ext4_buddy e4b; | 2013 | struct ext4_buddy e4b; |
@@ -2041,8 +2049,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
2041 | ac->ac_2order = i - 1; | 2049 | ac->ac_2order = i - 1; |
2042 | } | 2050 | } |
2043 | 2051 | ||
2044 | bsbits = ac->ac_sb->s_blocksize_bits; | ||
2045 | |||
2046 | /* if stream allocation is enabled, use global goal */ | 2052 | /* if stream allocation is enabled, use global goal */ |
2047 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { | 2053 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
2048 | /* TBD: may be hot point */ | 2054 | /* TBD: may be hot point */ |
@@ -2094,8 +2100,8 @@ repeat: | |||
2094 | ac->ac_groups_scanned++; | 2100 | ac->ac_groups_scanned++; |
2095 | if (cr == 0) | 2101 | if (cr == 0) |
2096 | ext4_mb_simple_scan_group(ac, &e4b); | 2102 | ext4_mb_simple_scan_group(ac, &e4b); |
2097 | else if (cr == 1 && | 2103 | else if (cr == 1 && sbi->s_stripe && |
2098 | ac->ac_g_ex.fe_len == sbi->s_stripe) | 2104 | !(ac->ac_g_ex.fe_len % sbi->s_stripe)) |
2099 | ext4_mb_scan_aligned(ac, &e4b); | 2105 | ext4_mb_scan_aligned(ac, &e4b); |
2100 | else | 2106 | else |
2101 | ext4_mb_complex_scan_group(ac, &e4b); | 2107 | ext4_mb_complex_scan_group(ac, &e4b); |
@@ -2221,7 +2227,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | |||
2221 | 2227 | ||
2222 | rc = seq_open(file, &ext4_mb_seq_groups_ops); | 2228 | rc = seq_open(file, &ext4_mb_seq_groups_ops); |
2223 | if (rc == 0) { | 2229 | if (rc == 0) { |
2224 | struct seq_file *m = (struct seq_file *)file->private_data; | 2230 | struct seq_file *m = file->private_data; |
2225 | m->private = sb; | 2231 | m->private = sb; |
2226 | } | 2232 | } |
2227 | return rc; | 2233 | return rc; |
@@ -2236,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = { | |||
2236 | .release = seq_release, | 2242 | .release = seq_release, |
2237 | }; | 2243 | }; |
2238 | 2244 | ||
2245 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | ||
2246 | { | ||
2247 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2248 | struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index]; | ||
2249 | |||
2250 | BUG_ON(!cachep); | ||
2251 | return cachep; | ||
2252 | } | ||
2239 | 2253 | ||
2240 | /* Create and initialize ext4_group_info data for the given group. */ | 2254 | /* Create and initialize ext4_group_info data for the given group. */ |
2241 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2255 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2242 | struct ext4_group_desc *desc) | 2256 | struct ext4_group_desc *desc) |
2243 | { | 2257 | { |
2244 | int i, len; | 2258 | int i; |
2245 | int metalen = 0; | 2259 | int metalen = 0; |
2246 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2260 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2247 | struct ext4_group_info **meta_group_info; | 2261 | struct ext4_group_info **meta_group_info; |
2262 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2248 | 2263 | ||
2249 | /* | 2264 | /* |
2250 | * First check if this group is the first of a reserved block. | 2265 | * First check if this group is the first of a reserved block. |
@@ -2264,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2264 | meta_group_info; | 2279 | meta_group_info; |
2265 | } | 2280 | } |
2266 | 2281 | ||
2267 | /* | ||
2268 | * calculate needed size. if change bb_counters size, | ||
2269 | * don't forget about ext4_mb_generate_buddy() | ||
2270 | */ | ||
2271 | len = offsetof(typeof(**meta_group_info), | ||
2272 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2273 | |||
2274 | meta_group_info = | 2282 | meta_group_info = |
2275 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2283 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2276 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2284 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2277 | 2285 | ||
2278 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | 2286 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2279 | if (meta_group_info[i] == NULL) { | 2287 | if (meta_group_info[i] == NULL) { |
2280 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | 2288 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); |
2281 | goto exit_group_info; | 2289 | goto exit_group_info; |
2282 | } | 2290 | } |
2291 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2283 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2292 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2284 | &(meta_group_info[i]->bb_state)); | 2293 | &(meta_group_info[i]->bb_state)); |
2285 | 2294 | ||
@@ -2334,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2334 | int num_meta_group_infos_max; | 2343 | int num_meta_group_infos_max; |
2335 | int array_size; | 2344 | int array_size; |
2336 | struct ext4_group_desc *desc; | 2345 | struct ext4_group_desc *desc; |
2346 | struct kmem_cache *cachep; | ||
2337 | 2347 | ||
2338 | /* This is the number of blocks used by GDT */ | 2348 | /* This is the number of blocks used by GDT */ |
2339 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2349 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
@@ -2376,6 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2376 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); | 2386 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); |
2377 | goto err_freesgi; | 2387 | goto err_freesgi; |
2378 | } | 2388 | } |
2389 | sbi->s_buddy_cache->i_ino = get_next_ino(); | ||
2379 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2390 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2380 | for (i = 0; i < ngroups; i++) { | 2391 | for (i = 0; i < ngroups; i++) { |
2381 | desc = ext4_get_group_desc(sb, i, NULL); | 2392 | desc = ext4_get_group_desc(sb, i, NULL); |
@@ -2391,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2391 | return 0; | 2402 | return 0; |
2392 | 2403 | ||
2393 | err_freebuddy: | 2404 | err_freebuddy: |
2405 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2394 | while (i-- > 0) | 2406 | while (i-- > 0) |
2395 | kfree(ext4_get_group_info(sb, i)); | 2407 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2396 | i = num_meta_group_infos; | 2408 | i = num_meta_group_infos; |
2397 | while (i-- > 0) | 2409 | while (i-- > 0) |
2398 | kfree(sbi->s_group_info[i]); | 2410 | kfree(sbi->s_group_info[i]); |
@@ -2409,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2409 | unsigned offset; | 2421 | unsigned offset; |
2410 | unsigned max; | 2422 | unsigned max; |
2411 | int ret; | 2423 | int ret; |
2424 | int cache_index; | ||
2425 | struct kmem_cache *cachep; | ||
2426 | char *namep = NULL; | ||
2412 | 2427 | ||
2413 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); | 2428 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2414 | 2429 | ||
2415 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2430 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2416 | if (sbi->s_mb_offsets == NULL) { | 2431 | if (sbi->s_mb_offsets == NULL) { |
2417 | return -ENOMEM; | 2432 | ret = -ENOMEM; |
2433 | goto out; | ||
2418 | } | 2434 | } |
2419 | 2435 | ||
2420 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); | 2436 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2421 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2437 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2422 | if (sbi->s_mb_maxs == NULL) { | 2438 | if (sbi->s_mb_maxs == NULL) { |
2423 | kfree(sbi->s_mb_offsets); | 2439 | ret = -ENOMEM; |
2424 | return -ENOMEM; | 2440 | goto out; |
2441 | } | ||
2442 | |||
2443 | cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2444 | cachep = ext4_groupinfo_caches[cache_index]; | ||
2445 | if (!cachep) { | ||
2446 | char name[32]; | ||
2447 | int len = offsetof(struct ext4_group_info, | ||
2448 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2449 | |||
2450 | sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits); | ||
2451 | namep = kstrdup(name, GFP_KERNEL); | ||
2452 | if (!namep) { | ||
2453 | ret = -ENOMEM; | ||
2454 | goto out; | ||
2455 | } | ||
2456 | |||
2457 | /* Need to free the kmem_cache_name() when we | ||
2458 | * destroy the slab */ | ||
2459 | cachep = kmem_cache_create(namep, len, 0, | ||
2460 | SLAB_RECLAIM_ACCOUNT, NULL); | ||
2461 | if (!cachep) { | ||
2462 | ret = -ENOMEM; | ||
2463 | goto out; | ||
2464 | } | ||
2465 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2425 | } | 2466 | } |
2426 | 2467 | ||
2427 | /* order 0 is regular bitmap */ | 2468 | /* order 0 is regular bitmap */ |
@@ -2442,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2442 | /* init file for buddy data */ | 2483 | /* init file for buddy data */ |
2443 | ret = ext4_mb_init_backend(sb); | 2484 | ret = ext4_mb_init_backend(sb); |
2444 | if (ret != 0) { | 2485 | if (ret != 0) { |
2445 | kfree(sbi->s_mb_offsets); | 2486 | goto out; |
2446 | kfree(sbi->s_mb_maxs); | ||
2447 | return ret; | ||
2448 | } | 2487 | } |
2449 | 2488 | ||
2450 | spin_lock_init(&sbi->s_md_lock); | 2489 | spin_lock_init(&sbi->s_md_lock); |
@@ -2459,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2459 | 2498 | ||
2460 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2499 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2461 | if (sbi->s_locality_groups == NULL) { | 2500 | if (sbi->s_locality_groups == NULL) { |
2462 | kfree(sbi->s_mb_offsets); | 2501 | ret = -ENOMEM; |
2463 | kfree(sbi->s_mb_maxs); | 2502 | goto out; |
2464 | return -ENOMEM; | ||
2465 | } | 2503 | } |
2466 | for_each_possible_cpu(i) { | 2504 | for_each_possible_cpu(i) { |
2467 | struct ext4_locality_group *lg; | 2505 | struct ext4_locality_group *lg; |
@@ -2478,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2478 | 2516 | ||
2479 | if (sbi->s_journal) | 2517 | if (sbi->s_journal) |
2480 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2518 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2481 | return 0; | 2519 | out: |
2520 | if (ret) { | ||
2521 | kfree(sbi->s_mb_offsets); | ||
2522 | kfree(sbi->s_mb_maxs); | ||
2523 | kfree(namep); | ||
2524 | } | ||
2525 | return ret; | ||
2482 | } | 2526 | } |
2483 | 2527 | ||
2484 | /* need to called with the ext4 group lock held */ | 2528 | /* need to called with the ext4 group lock held */ |
@@ -2506,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2506 | int num_meta_group_infos; | 2550 | int num_meta_group_infos; |
2507 | struct ext4_group_info *grinfo; | 2551 | struct ext4_group_info *grinfo; |
2508 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2552 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2553 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2509 | 2554 | ||
2510 | if (sbi->s_group_info) { | 2555 | if (sbi->s_group_info) { |
2511 | for (i = 0; i < ngroups; i++) { | 2556 | for (i = 0; i < ngroups; i++) { |
@@ -2516,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2516 | ext4_lock_group(sb, i); | 2561 | ext4_lock_group(sb, i); |
2517 | ext4_mb_cleanup_pa(grinfo); | 2562 | ext4_mb_cleanup_pa(grinfo); |
2518 | ext4_unlock_group(sb, i); | 2563 | ext4_unlock_group(sb, i); |
2519 | kfree(grinfo); | 2564 | kmem_cache_free(cachep, grinfo); |
2520 | } | 2565 | } |
2521 | num_meta_group_infos = (ngroups + | 2566 | num_meta_group_infos = (ngroups + |
2522 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2567 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
@@ -2560,6 +2605,23 @@ int ext4_mb_release(struct super_block *sb) | |||
2560 | return 0; | 2605 | return 0; |
2561 | } | 2606 | } |
2562 | 2607 | ||
2608 | static inline int ext4_issue_discard(struct super_block *sb, | ||
2609 | ext4_group_t block_group, ext4_grpblk_t block, int count) | ||
2610 | { | ||
2611 | int ret; | ||
2612 | ext4_fsblk_t discard_block; | ||
2613 | |||
2614 | discard_block = block + ext4_group_first_block_no(sb, block_group); | ||
2615 | trace_ext4_discard_blocks(sb, | ||
2616 | (unsigned long long) discard_block, count); | ||
2617 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); | ||
2618 | if (ret == -EOPNOTSUPP) { | ||
2619 | ext4_warning(sb, "discard not supported, disabling"); | ||
2620 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2621 | } | ||
2622 | return ret; | ||
2623 | } | ||
2624 | |||
2563 | /* | 2625 | /* |
2564 | * This function is called by the jbd2 layer once the commit has finished, | 2626 | * This function is called by the jbd2 layer once the commit has finished, |
2565 | * so we know we can free the blocks that were released with that commit. | 2627 | * so we know we can free the blocks that were released with that commit. |
@@ -2579,22 +2641,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2579 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2641 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2580 | entry->count, entry->group, entry); | 2642 | entry->count, entry->group, entry); |
2581 | 2643 | ||
2582 | if (test_opt(sb, DISCARD)) { | 2644 | if (test_opt(sb, DISCARD)) |
2583 | int ret; | 2645 | ext4_issue_discard(sb, entry->group, |
2584 | ext4_fsblk_t discard_block; | 2646 | entry->start_blk, entry->count); |
2585 | |||
2586 | discard_block = entry->start_blk + | ||
2587 | ext4_group_first_block_no(sb, entry->group); | ||
2588 | trace_ext4_discard_blocks(sb, | ||
2589 | (unsigned long long)discard_block, | ||
2590 | entry->count); | ||
2591 | ret = sb_issue_discard(sb, discard_block, entry->count); | ||
2592 | if (ret == EOPNOTSUPP) { | ||
2593 | ext4_warning(sb, | ||
2594 | "discard not supported, disabling"); | ||
2595 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2596 | } | ||
2597 | } | ||
2598 | 2647 | ||
2599 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2648 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2600 | /* we expect to find existing buddy because it's pinned */ | 2649 | /* we expect to find existing buddy because it's pinned */ |
@@ -2658,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void) | |||
2658 | 2707 | ||
2659 | #endif | 2708 | #endif |
2660 | 2709 | ||
2661 | int __init init_ext4_mballoc(void) | 2710 | int __init ext4_init_mballoc(void) |
2662 | { | 2711 | { |
2663 | ext4_pspace_cachep = | 2712 | ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, |
2664 | kmem_cache_create("ext4_prealloc_space", | 2713 | SLAB_RECLAIM_ACCOUNT); |
2665 | sizeof(struct ext4_prealloc_space), | ||
2666 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2667 | if (ext4_pspace_cachep == NULL) | 2714 | if (ext4_pspace_cachep == NULL) |
2668 | return -ENOMEM; | 2715 | return -ENOMEM; |
2669 | 2716 | ||
2670 | ext4_ac_cachep = | 2717 | ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, |
2671 | kmem_cache_create("ext4_alloc_context", | 2718 | SLAB_RECLAIM_ACCOUNT); |
2672 | sizeof(struct ext4_allocation_context), | ||
2673 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2674 | if (ext4_ac_cachep == NULL) { | 2719 | if (ext4_ac_cachep == NULL) { |
2675 | kmem_cache_destroy(ext4_pspace_cachep); | 2720 | kmem_cache_destroy(ext4_pspace_cachep); |
2676 | return -ENOMEM; | 2721 | return -ENOMEM; |
2677 | } | 2722 | } |
2678 | 2723 | ||
2679 | ext4_free_ext_cachep = | 2724 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, |
2680 | kmem_cache_create("ext4_free_block_extents", | 2725 | SLAB_RECLAIM_ACCOUNT); |
2681 | sizeof(struct ext4_free_data), | ||
2682 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2683 | if (ext4_free_ext_cachep == NULL) { | 2726 | if (ext4_free_ext_cachep == NULL) { |
2684 | kmem_cache_destroy(ext4_pspace_cachep); | 2727 | kmem_cache_destroy(ext4_pspace_cachep); |
2685 | kmem_cache_destroy(ext4_ac_cachep); | 2728 | kmem_cache_destroy(ext4_ac_cachep); |
@@ -2689,8 +2732,9 @@ int __init init_ext4_mballoc(void) | |||
2689 | return 0; | 2732 | return 0; |
2690 | } | 2733 | } |
2691 | 2734 | ||
2692 | void exit_ext4_mballoc(void) | 2735 | void ext4_exit_mballoc(void) |
2693 | { | 2736 | { |
2737 | int i; | ||
2694 | /* | 2738 | /* |
2695 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2739 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
2696 | * before destroying the slab cache. | 2740 | * before destroying the slab cache. |
@@ -2699,12 +2743,21 @@ void exit_ext4_mballoc(void) | |||
2699 | kmem_cache_destroy(ext4_pspace_cachep); | 2743 | kmem_cache_destroy(ext4_pspace_cachep); |
2700 | kmem_cache_destroy(ext4_ac_cachep); | 2744 | kmem_cache_destroy(ext4_ac_cachep); |
2701 | kmem_cache_destroy(ext4_free_ext_cachep); | 2745 | kmem_cache_destroy(ext4_free_ext_cachep); |
2746 | |||
2747 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2748 | struct kmem_cache *cachep = ext4_groupinfo_caches[i]; | ||
2749 | if (cachep) { | ||
2750 | char *name = (char *)kmem_cache_name(cachep); | ||
2751 | kmem_cache_destroy(cachep); | ||
2752 | kfree(name); | ||
2753 | } | ||
2754 | } | ||
2702 | ext4_remove_debugfs_entry(); | 2755 | ext4_remove_debugfs_entry(); |
2703 | } | 2756 | } |
2704 | 2757 | ||
2705 | 2758 | ||
2706 | /* | 2759 | /* |
2707 | * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps | 2760 | * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps |
2708 | * Returns 0 if success or error code | 2761 | * Returns 0 if success or error code |
2709 | */ | 2762 | */ |
2710 | static noinline_for_stack int | 2763 | static noinline_for_stack int |
@@ -2712,7 +2765,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2712 | handle_t *handle, unsigned int reserv_blks) | 2765 | handle_t *handle, unsigned int reserv_blks) |
2713 | { | 2766 | { |
2714 | struct buffer_head *bitmap_bh = NULL; | 2767 | struct buffer_head *bitmap_bh = NULL; |
2715 | struct ext4_super_block *es; | ||
2716 | struct ext4_group_desc *gdp; | 2768 | struct ext4_group_desc *gdp; |
2717 | struct buffer_head *gdp_bh; | 2769 | struct buffer_head *gdp_bh; |
2718 | struct ext4_sb_info *sbi; | 2770 | struct ext4_sb_info *sbi; |
@@ -2725,8 +2777,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2725 | 2777 | ||
2726 | sb = ac->ac_sb; | 2778 | sb = ac->ac_sb; |
2727 | sbi = EXT4_SB(sb); | 2779 | sbi = EXT4_SB(sb); |
2728 | es = sbi->s_es; | ||
2729 | |||
2730 | 2780 | ||
2731 | err = -EIO; | 2781 | err = -EIO; |
2732 | bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); | 2782 | bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); |
@@ -2812,7 +2862,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2812 | err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); | 2862 | err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); |
2813 | 2863 | ||
2814 | out_err: | 2864 | out_err: |
2815 | sb->s_dirt = 1; | 2865 | ext4_mark_super_dirty(sb); |
2816 | brelse(bitmap_bh); | 2866 | brelse(bitmap_bh); |
2817 | return err; | 2867 | return err; |
2818 | } | 2868 | } |
@@ -2850,7 +2900,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2850 | int bsbits, max; | 2900 | int bsbits, max; |
2851 | ext4_lblk_t end; | 2901 | ext4_lblk_t end; |
2852 | loff_t size, orig_size, start_off; | 2902 | loff_t size, orig_size, start_off; |
2853 | ext4_lblk_t start, orig_start; | 2903 | ext4_lblk_t start; |
2854 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 2904 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
2855 | struct ext4_prealloc_space *pa; | 2905 | struct ext4_prealloc_space *pa; |
2856 | 2906 | ||
@@ -2881,6 +2931,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2881 | size = size << bsbits; | 2931 | size = size << bsbits; |
2882 | if (size < i_size_read(ac->ac_inode)) | 2932 | if (size < i_size_read(ac->ac_inode)) |
2883 | size = i_size_read(ac->ac_inode); | 2933 | size = i_size_read(ac->ac_inode); |
2934 | orig_size = size; | ||
2884 | 2935 | ||
2885 | /* max size of free chunks */ | 2936 | /* max size of free chunks */ |
2886 | max = 2 << bsbits; | 2937 | max = 2 << bsbits; |
@@ -2922,8 +2973,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2922 | start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; | 2973 | start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; |
2923 | size = ac->ac_o_ex.fe_len << bsbits; | 2974 | size = ac->ac_o_ex.fe_len << bsbits; |
2924 | } | 2975 | } |
2925 | orig_size = size = size >> bsbits; | 2976 | size = size >> bsbits; |
2926 | orig_start = start = start_off >> bsbits; | 2977 | start = start_off >> bsbits; |
2927 | 2978 | ||
2928 | /* don't cover already allocated blocks in selected range */ | 2979 | /* don't cover already allocated blocks in selected range */ |
2929 | if (ar->pleft && start <= ar->lleft) { | 2980 | if (ar->pleft && start <= ar->lleft) { |
@@ -3537,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) | |||
3537 | */ | 3588 | */ |
3538 | static noinline_for_stack int | 3589 | static noinline_for_stack int |
3539 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | 3590 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, |
3540 | struct ext4_prealloc_space *pa, | 3591 | struct ext4_prealloc_space *pa) |
3541 | struct ext4_allocation_context *ac) | ||
3542 | { | 3592 | { |
3543 | struct super_block *sb = e4b->bd_sb; | 3593 | struct super_block *sb = e4b->bd_sb; |
3544 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3594 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -3547,7 +3597,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3547 | ext4_group_t group; | 3597 | ext4_group_t group; |
3548 | ext4_grpblk_t bit; | 3598 | ext4_grpblk_t bit; |
3549 | unsigned long long grp_blk_start; | 3599 | unsigned long long grp_blk_start; |
3550 | sector_t start; | ||
3551 | int err = 0; | 3600 | int err = 0; |
3552 | int free = 0; | 3601 | int free = 0; |
3553 | 3602 | ||
@@ -3557,32 +3606,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3557 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3606 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3558 | end = bit + pa->pa_len; | 3607 | end = bit + pa->pa_len; |
3559 | 3608 | ||
3560 | if (ac) { | ||
3561 | ac->ac_sb = sb; | ||
3562 | ac->ac_inode = pa->pa_inode; | ||
3563 | } | ||
3564 | |||
3565 | while (bit < end) { | 3609 | while (bit < end) { |
3566 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); | 3610 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); |
3567 | if (bit >= end) | 3611 | if (bit >= end) |
3568 | break; | 3612 | break; |
3569 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3613 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
3570 | start = ext4_group_first_block_no(sb, group) + bit; | ||
3571 | mb_debug(1, " free preallocated %u/%u in group %u\n", | 3614 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
3572 | (unsigned) start, (unsigned) next - bit, | 3615 | (unsigned) ext4_group_first_block_no(sb, group) + bit, |
3573 | (unsigned) group); | 3616 | (unsigned) next - bit, (unsigned) group); |
3574 | free += next - bit; | 3617 | free += next - bit; |
3575 | 3618 | ||
3576 | if (ac) { | 3619 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3577 | ac->ac_b_ex.fe_group = group; | 3620 | trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, |
3578 | ac->ac_b_ex.fe_start = bit; | 3621 | grp_blk_start + bit, next - bit); |
3579 | ac->ac_b_ex.fe_len = next - bit; | ||
3580 | ac->ac_b_ex.fe_logical = 0; | ||
3581 | trace_ext4_mballoc_discard(ac); | ||
3582 | } | ||
3583 | |||
3584 | trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, | ||
3585 | next - bit); | ||
3586 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3622 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3587 | bit = next + 1; | 3623 | bit = next + 1; |
3588 | } | 3624 | } |
@@ -3591,8 +3627,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3591 | pa, (unsigned long) pa->pa_lstart, | 3627 | pa, (unsigned long) pa->pa_lstart, |
3592 | (unsigned long) pa->pa_pstart, | 3628 | (unsigned long) pa->pa_pstart, |
3593 | (unsigned long) pa->pa_len); | 3629 | (unsigned long) pa->pa_len); |
3594 | ext4_grp_locked_error(sb, group, | 3630 | ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", |
3595 | __func__, "free %u, pa_free %u", | ||
3596 | free, pa->pa_free); | 3631 | free, pa->pa_free); |
3597 | /* | 3632 | /* |
3598 | * pa is already deleted so we use the value obtained | 3633 | * pa is already deleted so we use the value obtained |
@@ -3606,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3606 | 3641 | ||
3607 | static noinline_for_stack int | 3642 | static noinline_for_stack int |
3608 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, | 3643 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, |
3609 | struct ext4_prealloc_space *pa, | 3644 | struct ext4_prealloc_space *pa) |
3610 | struct ext4_allocation_context *ac) | ||
3611 | { | 3645 | { |
3612 | struct super_block *sb = e4b->bd_sb; | 3646 | struct super_block *sb = e4b->bd_sb; |
3613 | ext4_group_t group; | 3647 | ext4_group_t group; |
3614 | ext4_grpblk_t bit; | 3648 | ext4_grpblk_t bit; |
3615 | 3649 | ||
3616 | trace_ext4_mb_release_group_pa(ac, pa); | 3650 | trace_ext4_mb_release_group_pa(sb, pa); |
3617 | BUG_ON(pa->pa_deleted == 0); | 3651 | BUG_ON(pa->pa_deleted == 0); |
3618 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3652 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3619 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3653 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3620 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); | 3654 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
3621 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); | 3655 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
3622 | 3656 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); | |
3623 | if (ac) { | ||
3624 | ac->ac_sb = sb; | ||
3625 | ac->ac_inode = NULL; | ||
3626 | ac->ac_b_ex.fe_group = group; | ||
3627 | ac->ac_b_ex.fe_start = bit; | ||
3628 | ac->ac_b_ex.fe_len = pa->pa_len; | ||
3629 | ac->ac_b_ex.fe_logical = 0; | ||
3630 | trace_ext4_mballoc_discard(ac); | ||
3631 | } | ||
3632 | 3657 | ||
3633 | return 0; | 3658 | return 0; |
3634 | } | 3659 | } |
@@ -3649,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3649 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 3674 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
3650 | struct buffer_head *bitmap_bh = NULL; | 3675 | struct buffer_head *bitmap_bh = NULL; |
3651 | struct ext4_prealloc_space *pa, *tmp; | 3676 | struct ext4_prealloc_space *pa, *tmp; |
3652 | struct ext4_allocation_context *ac; | ||
3653 | struct list_head list; | 3677 | struct list_head list; |
3654 | struct ext4_buddy e4b; | 3678 | struct ext4_buddy e4b; |
3655 | int err; | 3679 | int err; |
@@ -3678,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3678 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3702 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; |
3679 | 3703 | ||
3680 | INIT_LIST_HEAD(&list); | 3704 | INIT_LIST_HEAD(&list); |
3681 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3682 | if (ac) | ||
3683 | ac->ac_sb = sb; | ||
3684 | repeat: | 3705 | repeat: |
3685 | ext4_lock_group(sb, group); | 3706 | ext4_lock_group(sb, group); |
3686 | list_for_each_entry_safe(pa, tmp, | 3707 | list_for_each_entry_safe(pa, tmp, |
@@ -3735,9 +3756,9 @@ repeat: | |||
3735 | spin_unlock(pa->pa_obj_lock); | 3756 | spin_unlock(pa->pa_obj_lock); |
3736 | 3757 | ||
3737 | if (pa->pa_type == MB_GROUP_PA) | 3758 | if (pa->pa_type == MB_GROUP_PA) |
3738 | ext4_mb_release_group_pa(&e4b, pa, ac); | 3759 | ext4_mb_release_group_pa(&e4b, pa); |
3739 | else | 3760 | else |
3740 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3761 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3741 | 3762 | ||
3742 | list_del(&pa->u.pa_tmp_list); | 3763 | list_del(&pa->u.pa_tmp_list); |
3743 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3764 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
@@ -3745,8 +3766,6 @@ repeat: | |||
3745 | 3766 | ||
3746 | out: | 3767 | out: |
3747 | ext4_unlock_group(sb, group); | 3768 | ext4_unlock_group(sb, group); |
3748 | if (ac) | ||
3749 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3750 | ext4_mb_unload_buddy(&e4b); | 3769 | ext4_mb_unload_buddy(&e4b); |
3751 | put_bh(bitmap_bh); | 3770 | put_bh(bitmap_bh); |
3752 | return free; | 3771 | return free; |
@@ -3767,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3767 | struct super_block *sb = inode->i_sb; | 3786 | struct super_block *sb = inode->i_sb; |
3768 | struct buffer_head *bitmap_bh = NULL; | 3787 | struct buffer_head *bitmap_bh = NULL; |
3769 | struct ext4_prealloc_space *pa, *tmp; | 3788 | struct ext4_prealloc_space *pa, *tmp; |
3770 | struct ext4_allocation_context *ac; | ||
3771 | ext4_group_t group = 0; | 3789 | ext4_group_t group = 0; |
3772 | struct list_head list; | 3790 | struct list_head list; |
3773 | struct ext4_buddy e4b; | 3791 | struct ext4_buddy e4b; |
@@ -3783,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3783 | 3801 | ||
3784 | INIT_LIST_HEAD(&list); | 3802 | INIT_LIST_HEAD(&list); |
3785 | 3803 | ||
3786 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3787 | if (ac) { | ||
3788 | ac->ac_sb = sb; | ||
3789 | ac->ac_inode = inode; | ||
3790 | } | ||
3791 | repeat: | 3804 | repeat: |
3792 | /* first, collect all pa's in the inode */ | 3805 | /* first, collect all pa's in the inode */ |
3793 | spin_lock(&ei->i_prealloc_lock); | 3806 | spin_lock(&ei->i_prealloc_lock); |
@@ -3857,7 +3870,7 @@ repeat: | |||
3857 | 3870 | ||
3858 | ext4_lock_group(sb, group); | 3871 | ext4_lock_group(sb, group); |
3859 | list_del(&pa->pa_group_list); | 3872 | list_del(&pa->pa_group_list); |
3860 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3873 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3861 | ext4_unlock_group(sb, group); | 3874 | ext4_unlock_group(sb, group); |
3862 | 3875 | ||
3863 | ext4_mb_unload_buddy(&e4b); | 3876 | ext4_mb_unload_buddy(&e4b); |
@@ -3866,8 +3879,6 @@ repeat: | |||
3866 | list_del(&pa->u.pa_tmp_list); | 3879 | list_del(&pa->u.pa_tmp_list); |
3867 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3880 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
3868 | } | 3881 | } |
3869 | if (ac) | ||
3870 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3871 | } | 3882 | } |
3872 | 3883 | ||
3873 | /* | 3884 | /* |
@@ -3889,6 +3900,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3889 | struct super_block *sb = ac->ac_sb; | 3900 | struct super_block *sb = ac->ac_sb; |
3890 | ext4_group_t ngroups, i; | 3901 | ext4_group_t ngroups, i; |
3891 | 3902 | ||
3903 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | ||
3904 | return; | ||
3905 | |||
3892 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3906 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
3893 | " Allocation context details:\n"); | 3907 | " Allocation context details:\n"); |
3894 | printk(KERN_ERR "EXT4-fs: status %d flags %d\n", | 3908 | printk(KERN_ERR "EXT4-fs: status %d flags %d\n", |
@@ -4062,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4062 | struct ext4_buddy e4b; | 4076 | struct ext4_buddy e4b; |
4063 | struct list_head discard_list; | 4077 | struct list_head discard_list; |
4064 | struct ext4_prealloc_space *pa, *tmp; | 4078 | struct ext4_prealloc_space *pa, *tmp; |
4065 | struct ext4_allocation_context *ac; | ||
4066 | 4079 | ||
4067 | mb_debug(1, "discard locality group preallocation\n"); | 4080 | mb_debug(1, "discard locality group preallocation\n"); |
4068 | 4081 | ||
4069 | INIT_LIST_HEAD(&discard_list); | 4082 | INIT_LIST_HEAD(&discard_list); |
4070 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4071 | if (ac) | ||
4072 | ac->ac_sb = sb; | ||
4073 | 4083 | ||
4074 | spin_lock(&lg->lg_prealloc_lock); | 4084 | spin_lock(&lg->lg_prealloc_lock); |
4075 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], | 4085 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
@@ -4121,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4121 | } | 4131 | } |
4122 | ext4_lock_group(sb, group); | 4132 | ext4_lock_group(sb, group); |
4123 | list_del(&pa->pa_group_list); | 4133 | list_del(&pa->pa_group_list); |
4124 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4134 | ext4_mb_release_group_pa(&e4b, pa); |
4125 | ext4_unlock_group(sb, group); | 4135 | ext4_unlock_group(sb, group); |
4126 | 4136 | ||
4127 | ext4_mb_unload_buddy(&e4b); | 4137 | ext4_mb_unload_buddy(&e4b); |
4128 | list_del(&pa->u.pa_tmp_list); | 4138 | list_del(&pa->u.pa_tmp_list); |
4129 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4139 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4130 | } | 4140 | } |
4131 | if (ac) | ||
4132 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4133 | } | 4141 | } |
4134 | 4142 | ||
4135 | /* | 4143 | /* |
@@ -4255,7 +4263,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4255 | * to usual allocation | 4263 | * to usual allocation |
4256 | */ | 4264 | */ |
4257 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4265 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
4258 | struct ext4_allocation_request *ar, int *errp) | 4266 | struct ext4_allocation_request *ar, int *errp) |
4259 | { | 4267 | { |
4260 | int freed; | 4268 | int freed; |
4261 | struct ext4_allocation_context *ac = NULL; | 4269 | struct ext4_allocation_context *ac = NULL; |
@@ -4299,7 +4307,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4299 | inquota = ar->len; | 4307 | inquota = ar->len; |
4300 | if (ar->len == 0) { | 4308 | if (ar->len == 0) { |
4301 | *errp = -EDQUOT; | 4309 | *errp = -EDQUOT; |
4302 | goto out3; | 4310 | goto out; |
4303 | } | 4311 | } |
4304 | } | 4312 | } |
4305 | 4313 | ||
@@ -4307,13 +4315,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4307 | if (!ac) { | 4315 | if (!ac) { |
4308 | ar->len = 0; | 4316 | ar->len = 0; |
4309 | *errp = -ENOMEM; | 4317 | *errp = -ENOMEM; |
4310 | goto out1; | 4318 | goto out; |
4311 | } | 4319 | } |
4312 | 4320 | ||
4313 | *errp = ext4_mb_initialize_context(ac, ar); | 4321 | *errp = ext4_mb_initialize_context(ac, ar); |
4314 | if (*errp) { | 4322 | if (*errp) { |
4315 | ar->len = 0; | 4323 | ar->len = 0; |
4316 | goto out2; | 4324 | goto out; |
4317 | } | 4325 | } |
4318 | 4326 | ||
4319 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; | 4327 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; |
@@ -4322,7 +4330,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4322 | ext4_mb_normalize_request(ac, ar); | 4330 | ext4_mb_normalize_request(ac, ar); |
4323 | repeat: | 4331 | repeat: |
4324 | /* allocate space in core */ | 4332 | /* allocate space in core */ |
4325 | ext4_mb_regular_allocator(ac); | 4333 | *errp = ext4_mb_regular_allocator(ac); |
4334 | if (*errp) | ||
4335 | goto errout; | ||
4326 | 4336 | ||
4327 | /* as we've just preallocated more space than | 4337 | /* as we've just preallocated more space than |
4328 | * user requested orinally, we store allocated | 4338 | * user requested orinally, we store allocated |
@@ -4333,7 +4343,7 @@ repeat: | |||
4333 | } | 4343 | } |
4334 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4344 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4335 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); | 4345 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4336 | if (*errp == -EAGAIN) { | 4346 | if (*errp == -EAGAIN) { |
4337 | /* | 4347 | /* |
4338 | * drop the reference that we took | 4348 | * drop the reference that we took |
4339 | * in ext4_mb_use_best_found | 4349 | * in ext4_mb_use_best_found |
@@ -4344,12 +4354,10 @@ repeat: | |||
4344 | ac->ac_b_ex.fe_len = 0; | 4354 | ac->ac_b_ex.fe_len = 0; |
4345 | ac->ac_status = AC_STATUS_CONTINUE; | 4355 | ac->ac_status = AC_STATUS_CONTINUE; |
4346 | goto repeat; | 4356 | goto repeat; |
4347 | } else if (*errp) { | 4357 | } else if (*errp) |
4358 | errout: | ||
4348 | ext4_discard_allocated_blocks(ac); | 4359 | ext4_discard_allocated_blocks(ac); |
4349 | ac->ac_b_ex.fe_len = 0; | 4360 | else { |
4350 | ar->len = 0; | ||
4351 | ext4_mb_show_ac(ac); | ||
4352 | } else { | ||
4353 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); | 4361 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); |
4354 | ar->len = ac->ac_b_ex.fe_len; | 4362 | ar->len = ac->ac_b_ex.fe_len; |
4355 | } | 4363 | } |
@@ -4358,19 +4366,19 @@ repeat: | |||
4358 | if (freed) | 4366 | if (freed) |
4359 | goto repeat; | 4367 | goto repeat; |
4360 | *errp = -ENOSPC; | 4368 | *errp = -ENOSPC; |
4369 | } | ||
4370 | |||
4371 | if (*errp) { | ||
4361 | ac->ac_b_ex.fe_len = 0; | 4372 | ac->ac_b_ex.fe_len = 0; |
4362 | ar->len = 0; | 4373 | ar->len = 0; |
4363 | ext4_mb_show_ac(ac); | 4374 | ext4_mb_show_ac(ac); |
4364 | } | 4375 | } |
4365 | |||
4366 | ext4_mb_release_context(ac); | 4376 | ext4_mb_release_context(ac); |
4367 | 4377 | out: | |
4368 | out2: | 4378 | if (ac) |
4369 | kmem_cache_free(ext4_ac_cachep, ac); | 4379 | kmem_cache_free(ext4_ac_cachep, ac); |
4370 | out1: | ||
4371 | if (inquota && ar->len < inquota) | 4380 | if (inquota && ar->len < inquota) |
4372 | dquot_free_block(ar->inode, inquota - ar->len); | 4381 | dquot_free_block(ar->inode, inquota - ar->len); |
4373 | out3: | ||
4374 | if (!ar->len) { | 4382 | if (!ar->len) { |
4375 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4383 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) |
4376 | /* release all the reserved blocks if non delalloc */ | 4384 | /* release all the reserved blocks if non delalloc */ |
@@ -4402,6 +4410,7 @@ static noinline_for_stack int | |||
4402 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | 4410 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, |
4403 | struct ext4_free_data *new_entry) | 4411 | struct ext4_free_data *new_entry) |
4404 | { | 4412 | { |
4413 | ext4_group_t group = e4b->bd_group; | ||
4405 | ext4_grpblk_t block; | 4414 | ext4_grpblk_t block; |
4406 | struct ext4_free_data *entry; | 4415 | struct ext4_free_data *entry; |
4407 | struct ext4_group_info *db = e4b->bd_info; | 4416 | struct ext4_group_info *db = e4b->bd_info; |
@@ -4434,9 +4443,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4434 | else if (block >= (entry->start_blk + entry->count)) | 4443 | else if (block >= (entry->start_blk + entry->count)) |
4435 | n = &(*n)->rb_right; | 4444 | n = &(*n)->rb_right; |
4436 | else { | 4445 | else { |
4437 | ext4_grp_locked_error(sb, e4b->bd_group, __func__, | 4446 | ext4_grp_locked_error(sb, group, 0, |
4438 | "Double free of blocks %d (%d %d)", | 4447 | ext4_group_first_block_no(sb, group) + block, |
4439 | block, entry->start_blk, entry->count); | 4448 | "Block already on to-be-freed list"); |
4440 | return 0; | 4449 | return 0; |
4441 | } | 4450 | } |
4442 | } | 4451 | } |
@@ -4492,9 +4501,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4492 | { | 4501 | { |
4493 | struct buffer_head *bitmap_bh = NULL; | 4502 | struct buffer_head *bitmap_bh = NULL; |
4494 | struct super_block *sb = inode->i_sb; | 4503 | struct super_block *sb = inode->i_sb; |
4495 | struct ext4_allocation_context *ac = NULL; | ||
4496 | struct ext4_group_desc *gdp; | 4504 | struct ext4_group_desc *gdp; |
4497 | struct ext4_super_block *es; | ||
4498 | unsigned long freed = 0; | 4505 | unsigned long freed = 0; |
4499 | unsigned int overflow; | 4506 | unsigned int overflow; |
4500 | ext4_grpblk_t bit; | 4507 | ext4_grpblk_t bit; |
@@ -4513,7 +4520,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4513 | } | 4520 | } |
4514 | 4521 | ||
4515 | sbi = EXT4_SB(sb); | 4522 | sbi = EXT4_SB(sb); |
4516 | es = EXT4_SB(sb)->s_es; | ||
4517 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && | 4523 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
4518 | !ext4_data_block_valid(sbi, block, count)) { | 4524 | !ext4_data_block_valid(sbi, block, count)) { |
4519 | ext4_error(sb, "Freeing blocks not in datazone - " | 4525 | ext4_error(sb, "Freeing blocks not in datazone - " |
@@ -4534,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4534 | if (!bh) | 4540 | if (!bh) |
4535 | tbh = sb_find_get_block(inode->i_sb, | 4541 | tbh = sb_find_get_block(inode->i_sb, |
4536 | block + i); | 4542 | block + i); |
4543 | if (unlikely(!tbh)) | ||
4544 | continue; | ||
4537 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4545 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4538 | inode, tbh, block + i); | 4546 | inode, tbh, block + i); |
4539 | } | 4547 | } |
@@ -4549,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4549 | if (!ext4_should_writeback_data(inode)) | 4557 | if (!ext4_should_writeback_data(inode)) |
4550 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4558 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4551 | 4559 | ||
4552 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4553 | if (ac) { | ||
4554 | ac->ac_inode = inode; | ||
4555 | ac->ac_sb = sb; | ||
4556 | } | ||
4557 | |||
4558 | do_more: | 4560 | do_more: |
4559 | overflow = 0; | 4561 | overflow = 0; |
4560 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4562 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4612,12 +4614,7 @@ do_more: | |||
4612 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4614 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4613 | } | 4615 | } |
4614 | #endif | 4616 | #endif |
4615 | if (ac) { | 4617 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); |
4616 | ac->ac_b_ex.fe_group = block_group; | ||
4617 | ac->ac_b_ex.fe_start = bit; | ||
4618 | ac->ac_b_ex.fe_len = count; | ||
4619 | trace_ext4_mballoc_free(ac); | ||
4620 | } | ||
4621 | 4618 | ||
4622 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4623 | if (err) | 4620 | if (err) |
@@ -4643,6 +4640,8 @@ do_more: | |||
4643 | * with group lock held. generate_buddy look at | 4640 | * with group lock held. generate_buddy look at |
4644 | * them with group lock_held | 4641 | * them with group lock_held |
4645 | */ | 4642 | */ |
4643 | if (test_opt(sb, DISCARD)) | ||
4644 | ext4_issue_discard(sb, block_group, bit, count); | ||
4646 | ext4_lock_group(sb, block_group); | 4645 | ext4_lock_group(sb, block_group); |
4647 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4646 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4648 | mb_free_blocks(inode, &e4b, bit, count); | 4647 | mb_free_blocks(inode, &e4b, bit, count); |
@@ -4680,13 +4679,196 @@ do_more: | |||
4680 | put_bh(bitmap_bh); | 4679 | put_bh(bitmap_bh); |
4681 | goto do_more; | 4680 | goto do_more; |
4682 | } | 4681 | } |
4683 | sb->s_dirt = 1; | 4682 | ext4_mark_super_dirty(sb); |
4684 | error_return: | 4683 | error_return: |
4685 | if (freed) | 4684 | if (freed) |
4686 | dquot_free_block(inode, freed); | 4685 | dquot_free_block(inode, freed); |
4687 | brelse(bitmap_bh); | 4686 | brelse(bitmap_bh); |
4688 | ext4_std_error(sb, err); | 4687 | ext4_std_error(sb, err); |
4689 | if (ac) | ||
4690 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4691 | return; | 4688 | return; |
4692 | } | 4689 | } |
4690 | |||
4691 | /** | ||
4692 | * ext4_trim_extent -- function to TRIM one single free extent in the group | ||
4693 | * @sb: super block for the file system | ||
4694 | * @start: starting block of the free extent in the alloc. group | ||
4695 | * @count: number of blocks to TRIM | ||
4696 | * @group: alloc. group we are working with | ||
4697 | * @e4b: ext4 buddy for the group | ||
4698 | * | ||
4699 | * Trim "count" blocks starting at "start" in the "group". To assure that no | ||
4700 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | ||
4701 | * be called with under the group lock. | ||
4702 | */ | ||
4703 | static int ext4_trim_extent(struct super_block *sb, int start, int count, | ||
4704 | ext4_group_t group, struct ext4_buddy *e4b) | ||
4705 | { | ||
4706 | struct ext4_free_extent ex; | ||
4707 | int ret = 0; | ||
4708 | |||
4709 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | ||
4710 | |||
4711 | ex.fe_start = start; | ||
4712 | ex.fe_group = group; | ||
4713 | ex.fe_len = count; | ||
4714 | |||
4715 | /* | ||
4716 | * Mark blocks used, so no one can reuse them while | ||
4717 | * being trimmed. | ||
4718 | */ | ||
4719 | mb_mark_used(e4b, &ex); | ||
4720 | ext4_unlock_group(sb, group); | ||
4721 | |||
4722 | ret = ext4_issue_discard(sb, group, start, count); | ||
4723 | if (ret) | ||
4724 | ext4_std_error(sb, ret); | ||
4725 | |||
4726 | ext4_lock_group(sb, group); | ||
4727 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | ||
4728 | return ret; | ||
4729 | } | ||
4730 | |||
4731 | /** | ||
4732 | * ext4_trim_all_free -- function to trim all free space in alloc. group | ||
4733 | * @sb: super block for file system | ||
4734 | * @e4b: ext4 buddy | ||
4735 | * @start: first group block to examine | ||
4736 | * @max: last group block to examine | ||
4737 | * @minblocks: minimum extent block count | ||
4738 | * | ||
4739 | * ext4_trim_all_free walks through group's buddy bitmap searching for free | ||
4740 | * extents. When the free block is found, ext4_trim_extent is called to TRIM | ||
4741 | * the extent. | ||
4742 | * | ||
4743 | * | ||
4744 | * ext4_trim_all_free walks through group's block bitmap searching for free | ||
4745 | * extents. When the free extent is found, mark it as used in group buddy | ||
4746 | * bitmap. Then issue a TRIM command on this extent and free the extent in | ||
4747 | * the group buddy bitmap. This is done until whole group is scanned. | ||
4748 | */ | ||
4749 | ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | ||
4750 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | ||
4751 | { | ||
4752 | void *bitmap; | ||
4753 | ext4_grpblk_t next, count = 0; | ||
4754 | ext4_group_t group; | ||
4755 | int ret = 0; | ||
4756 | |||
4757 | BUG_ON(e4b == NULL); | ||
4758 | |||
4759 | bitmap = e4b->bd_bitmap; | ||
4760 | group = e4b->bd_group; | ||
4761 | start = (e4b->bd_info->bb_first_free > start) ? | ||
4762 | e4b->bd_info->bb_first_free : start; | ||
4763 | ext4_lock_group(sb, group); | ||
4764 | |||
4765 | while (start < max) { | ||
4766 | start = mb_find_next_zero_bit(bitmap, max, start); | ||
4767 | if (start >= max) | ||
4768 | break; | ||
4769 | next = mb_find_next_bit(bitmap, max, start); | ||
4770 | |||
4771 | if ((next - start) >= minblocks) { | ||
4772 | ret = ext4_trim_extent(sb, start, | ||
4773 | next - start, group, e4b); | ||
4774 | if (ret < 0) | ||
4775 | break; | ||
4776 | count += next - start; | ||
4777 | } | ||
4778 | start = next + 1; | ||
4779 | |||
4780 | if (fatal_signal_pending(current)) { | ||
4781 | count = -ERESTARTSYS; | ||
4782 | break; | ||
4783 | } | ||
4784 | |||
4785 | if (need_resched()) { | ||
4786 | ext4_unlock_group(sb, group); | ||
4787 | cond_resched(); | ||
4788 | ext4_lock_group(sb, group); | ||
4789 | } | ||
4790 | |||
4791 | if ((e4b->bd_info->bb_free - count) < minblocks) | ||
4792 | break; | ||
4793 | } | ||
4794 | ext4_unlock_group(sb, group); | ||
4795 | |||
4796 | ext4_debug("trimmed %d blocks in the group %d\n", | ||
4797 | count, group); | ||
4798 | |||
4799 | if (ret < 0) | ||
4800 | count = ret; | ||
4801 | |||
4802 | return count; | ||
4803 | } | ||
4804 | |||
4805 | /** | ||
4806 | * ext4_trim_fs() -- trim ioctl handle function | ||
4807 | * @sb: superblock for filesystem | ||
4808 | * @range: fstrim_range structure | ||
4809 | * | ||
4810 | * start: First Byte to trim | ||
4811 | * len: number of Bytes to trim from start | ||
4812 | * minlen: minimum extent length in Bytes | ||
4813 | * ext4_trim_fs goes through all allocation groups containing Bytes from | ||
4814 | * start to start+len. For each such a group ext4_trim_all_free function | ||
4815 | * is invoked to trim all free space. | ||
4816 | */ | ||
4817 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
4818 | { | ||
4819 | struct ext4_buddy e4b; | ||
4820 | ext4_group_t first_group, last_group; | ||
4821 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
4822 | ext4_grpblk_t cnt = 0, first_block, last_block; | ||
4823 | uint64_t start, len, minlen, trimmed; | ||
4824 | int ret = 0; | ||
4825 | |||
4826 | start = range->start >> sb->s_blocksize_bits; | ||
4827 | len = range->len >> sb->s_blocksize_bits; | ||
4828 | minlen = range->minlen >> sb->s_blocksize_bits; | ||
4829 | trimmed = 0; | ||
4830 | |||
4831 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | ||
4832 | return -EINVAL; | ||
4833 | |||
4834 | /* Determine first and last group to examine based on start and len */ | ||
4835 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | ||
4836 | &first_group, &first_block); | ||
4837 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | ||
4838 | &last_group, &last_block); | ||
4839 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
4840 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | ||
4841 | |||
4842 | if (first_group > last_group) | ||
4843 | return -EINVAL; | ||
4844 | |||
4845 | for (group = first_group; group <= last_group; group++) { | ||
4846 | ret = ext4_mb_load_buddy(sb, group, &e4b); | ||
4847 | if (ret) { | ||
4848 | ext4_error(sb, "Error in loading buddy " | ||
4849 | "information for %u", group); | ||
4850 | break; | ||
4851 | } | ||
4852 | |||
4853 | if (len >= EXT4_BLOCKS_PER_GROUP(sb)) | ||
4854 | len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); | ||
4855 | else | ||
4856 | last_block = len; | ||
4857 | |||
4858 | if (e4b.bd_info->bb_free >= minlen) { | ||
4859 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | ||
4860 | last_block, minlen); | ||
4861 | if (cnt < 0) { | ||
4862 | ret = cnt; | ||
4863 | ext4_mb_unload_buddy(&e4b); | ||
4864 | break; | ||
4865 | } | ||
4866 | } | ||
4867 | ext4_mb_unload_buddy(&e4b); | ||
4868 | trimmed += cnt; | ||
4869 | first_block = 0; | ||
4870 | } | ||
4871 | range->len = trimmed * sb->s_blocksize; | ||
4872 | |||
4873 | return ret; | ||
4874 | } | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 6f3a27ec30bf..25f3a974b725 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
376 | * We have the extent map build with the tmp inode. | 376 | * We have the extent map build with the tmp inode. |
377 | * Now copy the i_data across | 377 | * Now copy the i_data across |
378 | */ | 378 | */ |
379 | ei->i_flags |= EXT4_EXTENTS_FL; | 379 | ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS); |
380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); | 380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); |
381 | 381 | ||
382 | /* | 382 | /* |
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
412 | struct buffer_head *bh; | 412 | struct buffer_head *bh; |
413 | struct ext4_extent_header *eh; | 413 | struct ext4_extent_header *eh; |
414 | 414 | ||
415 | block = idx_pblock(ix); | 415 | block = ext4_idx_pblock(ix); |
416 | bh = sb_bread(inode->i_sb, block); | 416 | bh = sb_bread(inode->i_sb, block); |
417 | if (!bh) | 417 | if (!bh) |
418 | return -EIO; | 418 | return -EIO; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 52abfa12762a..b9f3e7862f13 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | 85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { |
86 | /* leaf block */ | 86 | /* leaf block */ |
87 | *extent = ++path[ppos].p_ext; | 87 | *extent = ++path[ppos].p_ext; |
88 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 88 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
96 | 96 | ||
97 | /* index block */ | 97 | /* index block */ |
98 | path[ppos].p_idx++; | 98 | path[ppos].p_idx++; |
99 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 99 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
100 | if (path[ppos+1].p_bh) | 100 | if (path[ppos+1].p_bh) |
101 | brelse(path[ppos+1].p_bh); | 101 | brelse(path[ppos+1].p_bh); |
102 | path[ppos+1].p_bh = | 102 | path[ppos+1].p_bh = |
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
111 | path[cur_ppos].p_idx = | 111 | path[cur_ppos].p_idx = |
112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); | 112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); |
113 | path[cur_ppos].p_block = | 113 | path[cur_ppos].p_block = |
114 | idx_pblock(path[cur_ppos].p_idx); | 114 | ext4_idx_pblock(path[cur_ppos].p_idx); |
115 | if (path[cur_ppos+1].p_bh) | 115 | if (path[cur_ppos+1].p_bh) |
116 | brelse(path[cur_ppos+1].p_bh); | 116 | brelse(path[cur_ppos+1].p_bh); |
117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, | 117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, |
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
133 | path[leaf_ppos].p_ext = *extent = | 133 | path[leaf_ppos].p_ext = *extent = |
134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | 134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); |
135 | path[leaf_ppos].p_block = | 135 | path[leaf_ppos].p_block = |
136 | ext_pblock(path[leaf_ppos].p_ext); | 136 | ext4_ext_pblock(path[leaf_ppos].p_ext); |
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | } | 139 | } |
@@ -148,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
148 | */ | 148 | */ |
149 | static int | 149 | static int |
150 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | 150 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, |
151 | const char *function) | 151 | const char *function, unsigned int line) |
152 | { | 152 | { |
153 | int ret = 0; | 153 | int ret = 0; |
154 | 154 | ||
155 | if (inode1 == NULL) { | 155 | if (inode1 == NULL) { |
156 | __ext4_error(inode2->i_sb, function, | 156 | __ext4_error(inode2->i_sb, function, line, |
157 | "Both inodes should not be NULL: " | 157 | "Both inodes should not be NULL: " |
158 | "inode1 NULL inode2 %lu", inode2->i_ino); | 158 | "inode1 NULL inode2 %lu", inode2->i_ino); |
159 | ret = -EIO; | 159 | ret = -EIO; |
160 | } else if (inode2 == NULL) { | 160 | } else if (inode2 == NULL) { |
161 | __ext4_error(inode1->i_sb, function, | 161 | __ext4_error(inode1->i_sb, function, line, |
162 | "Both inodes should not be NULL: " | 162 | "Both inodes should not be NULL: " |
163 | "inode1 %lu inode2 NULL", inode1->i_ino); | 163 | "inode1 %lu inode2 NULL", inode1->i_ino); |
164 | ret = -EIO; | 164 | ret = -EIO; |
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
249 | */ | 249 | */ |
250 | o_end->ee_block = end_ext->ee_block; | 250 | o_end->ee_block = end_ext->ee_block; |
251 | o_end->ee_len = end_ext->ee_len; | 251 | o_end->ee_len = end_ext->ee_len; |
252 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 252 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
253 | } | 253 | } |
254 | 254 | ||
255 | o_start->ee_len = start_ext->ee_len; | 255 | o_start->ee_len = start_ext->ee_len; |
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
276 | */ | 276 | */ |
277 | o_end->ee_block = end_ext->ee_block; | 277 | o_end->ee_block = end_ext->ee_block; |
278 | o_end->ee_len = end_ext->ee_len; | 278 | o_end->ee_len = end_ext->ee_len; |
279 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 279 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
280 | 280 | ||
281 | /* | 281 | /* |
282 | * Set 0 to the extent block if new_ext was | 282 | * Set 0 to the extent block if new_ext was |
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start, | |||
361 | /* Insert new entry */ | 361 | /* Insert new entry */ |
362 | if (new_ext->ee_len) { | 362 | if (new_ext->ee_len) { |
363 | o_start[i] = *new_ext; | 363 | o_start[i] = *new_ext; |
364 | ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); | 364 | ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); |
365 | } | 365 | } |
366 | 366 | ||
367 | /* Insert end entry */ | 367 | /* Insert end entry */ |
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
488 | start_ext.ee_len = end_ext.ee_len = 0; | 488 | start_ext.ee_len = end_ext.ee_len = 0; |
489 | 489 | ||
490 | new_ext.ee_block = cpu_to_le32(*from); | 490 | new_ext.ee_block = cpu_to_le32(*from); |
491 | ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); | 491 | ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); |
492 | new_ext.ee_len = dext->ee_len; | 492 | new_ext.ee_len = dext->ee_len; |
493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | 493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); |
494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | 494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; |
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
553 | copy_extent_status(oext, &end_ext); | 553 | copy_extent_status(oext, &end_ext); |
554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); | 554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); |
555 | ext4_ext_store_pblock(&end_ext, | 555 | ext4_ext_store_pblock(&end_ext, |
556 | (ext_pblock(o_end) + oext_alen - end_ext_alen)); | 556 | (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); |
557 | end_ext.ee_block = | 557 | end_ext.ee_block = |
558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + | 558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + |
559 | oext_alen - end_ext_alen); | 559 | oext_alen - end_ext_alen); |
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
604 | /* When tmp_dext is too large, pick up the target range. */ | 604 | /* When tmp_dext is too large, pick up the target range. */ |
605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | 605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); |
606 | 606 | ||
607 | ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); | 607 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); |
608 | tmp_dext->ee_block = | 608 | tmp_dext->ee_block = |
609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); | 609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); |
610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); | 610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); |
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
613 | tmp_dext->ee_len = cpu_to_le16(max_count); | 613 | tmp_dext->ee_len = cpu_to_le16(max_count); |
614 | 614 | ||
615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); | 615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); |
616 | ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); | 616 | ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); |
617 | 617 | ||
618 | /* Adjust extent length if donor extent is larger than orig */ | 618 | /* Adjust extent length if donor extent is larger than orig */ |
619 | if (ext4_ext_get_actual_len(tmp_dext) > | 619 | if (ext4_ext_get_actual_len(tmp_dext) > |
@@ -1084,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1084 | 1084 | ||
1085 | BUG_ON(inode1 == NULL && inode2 == NULL); | 1085 | BUG_ON(inode1 == NULL && inode2 == NULL); |
1086 | 1086 | ||
1087 | ret = mext_check_null_inode(inode1, inode2, __func__); | 1087 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); |
1088 | if (ret < 0) | 1088 | if (ret < 0) |
1089 | goto out; | 1089 | goto out; |
1090 | 1090 | ||
@@ -1121,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | |||
1121 | 1121 | ||
1122 | BUG_ON(inode1 == NULL && inode2 == NULL); | 1122 | BUG_ON(inode1 == NULL && inode2 == NULL); |
1123 | 1123 | ||
1124 | ret = mext_check_null_inode(inode1, inode2, __func__); | 1124 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); |
1125 | if (ret < 0) | 1125 | if (ret < 0) |
1126 | goto out; | 1126 | goto out; |
1127 | 1127 | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a43e6617b351..92203b8a099f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, | |||
179 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 179 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
180 | struct inode *inode); | 180 | struct inode *inode); |
181 | 181 | ||
182 | unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) | ||
183 | { | ||
184 | unsigned len = le16_to_cpu(dlen); | ||
185 | |||
186 | if (len == EXT4_MAX_REC_LEN || len == 0) | ||
187 | return blocksize; | ||
188 | return (len & 65532) | ((len & 3) << 16); | ||
189 | } | ||
190 | |||
191 | __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | ||
192 | { | ||
193 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) | ||
194 | BUG(); | ||
195 | if (len < 65536) | ||
196 | return cpu_to_le16(len); | ||
197 | if (len == blocksize) { | ||
198 | if (blocksize == 65536) | ||
199 | return cpu_to_le16(EXT4_MAX_REC_LEN); | ||
200 | else | ||
201 | return cpu_to_le16(0); | ||
202 | } | ||
203 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); | ||
204 | } | ||
205 | |||
206 | /* | 182 | /* |
207 | * p is at least 6 bytes before the end of page | 183 | * p is at least 6 bytes before the end of page |
208 | */ | 184 | */ |
@@ -605,7 +581,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
605 | dir->i_sb->s_blocksize - | 581 | dir->i_sb->s_blocksize - |
606 | EXT4_DIR_REC_LEN(0)); | 582 | EXT4_DIR_REC_LEN(0)); |
607 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { | 583 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { |
608 | if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, | 584 | if (!ext4_check_dir_entry(dir, de, bh, |
609 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | 585 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) |
610 | +((char *)de - bh->b_data))) { | 586 | +((char *)de - bh->b_data))) { |
611 | /* On error, skip the f_pos to the next block. */ | 587 | /* On error, skip the f_pos to the next block. */ |
@@ -844,8 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh, | |||
844 | if ((char *) de + namelen <= dlimit && | 820 | if ((char *) de + namelen <= dlimit && |
845 | ext4_match (namelen, name, de)) { | 821 | ext4_match (namelen, name, de)) { |
846 | /* found a match - just to be sure, do a full check */ | 822 | /* found a match - just to be sure, do a full check */ |
847 | if (!ext4_check_dir_entry("ext4_find_entry", | 823 | if (!ext4_check_dir_entry(dir, de, bh, offset)) |
848 | dir, de, bh, offset)) | ||
849 | return -1; | 824 | return -1; |
850 | *res_dir = de; | 825 | *res_dir = de; |
851 | return 1; | 826 | return 1; |
@@ -881,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
881 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; | 856 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
882 | struct buffer_head *bh, *ret = NULL; | 857 | struct buffer_head *bh, *ret = NULL; |
883 | ext4_lblk_t start, block, b; | 858 | ext4_lblk_t start, block, b; |
859 | const u8 *name = d_name->name; | ||
884 | int ra_max = 0; /* Number of bh's in the readahead | 860 | int ra_max = 0; /* Number of bh's in the readahead |
885 | buffer, bh_use[] */ | 861 | buffer, bh_use[] */ |
886 | int ra_ptr = 0; /* Current index into readahead | 862 | int ra_ptr = 0; /* Current index into readahead |
@@ -895,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
895 | namelen = d_name->len; | 871 | namelen = d_name->len; |
896 | if (namelen > EXT4_NAME_LEN) | 872 | if (namelen > EXT4_NAME_LEN) |
897 | return NULL; | 873 | return NULL; |
874 | if ((namelen <= 2) && (name[0] == '.') && | ||
875 | (name[1] == '.' || name[1] == '0')) { | ||
876 | /* | ||
877 | * "." or ".." will only be in the first block | ||
878 | * NFS may look up ".."; "." should be handled by the VFS | ||
879 | */ | ||
880 | block = start = 0; | ||
881 | nblocks = 1; | ||
882 | goto restart; | ||
883 | } | ||
898 | if (is_dx(dir)) { | 884 | if (is_dx(dir)) { |
899 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); | 885 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
900 | /* | 886 | /* |
@@ -985,55 +971,35 @@ cleanup_and_exit: | |||
985 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, | 971 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
986 | struct ext4_dir_entry_2 **res_dir, int *err) | 972 | struct ext4_dir_entry_2 **res_dir, int *err) |
987 | { | 973 | { |
988 | struct super_block * sb; | 974 | struct super_block * sb = dir->i_sb; |
989 | struct dx_hash_info hinfo; | 975 | struct dx_hash_info hinfo; |
990 | u32 hash; | ||
991 | struct dx_frame frames[2], *frame; | 976 | struct dx_frame frames[2], *frame; |
992 | struct ext4_dir_entry_2 *de, *top; | ||
993 | struct buffer_head *bh; | 977 | struct buffer_head *bh; |
994 | ext4_lblk_t block; | 978 | ext4_lblk_t block; |
995 | int retval; | 979 | int retval; |
996 | int namelen = d_name->len; | ||
997 | const u8 *name = d_name->name; | ||
998 | 980 | ||
999 | sb = dir->i_sb; | 981 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
1000 | /* NFS may look up ".." - look at dx_root directory block */ | 982 | return NULL; |
1001 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | ||
1002 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) | ||
1003 | return NULL; | ||
1004 | } else { | ||
1005 | frame = frames; | ||
1006 | frame->bh = NULL; /* for dx_release() */ | ||
1007 | frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ | ||
1008 | dx_set_block(frame->at, 0); /* dx_root block is 0 */ | ||
1009 | } | ||
1010 | hash = hinfo.hash; | ||
1011 | do { | 983 | do { |
1012 | block = dx_get_block(frame->at); | 984 | block = dx_get_block(frame->at); |
1013 | if (!(bh = ext4_bread (NULL,dir, block, 0, err))) | 985 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) |
1014 | goto errout; | 986 | goto errout; |
1015 | de = (struct ext4_dir_entry_2 *) bh->b_data; | ||
1016 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - | ||
1017 | EXT4_DIR_REC_LEN(0)); | ||
1018 | for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) { | ||
1019 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) | ||
1020 | + ((char *) de - bh->b_data); | ||
1021 | |||
1022 | if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) { | ||
1023 | brelse(bh); | ||
1024 | *err = ERR_BAD_DX_DIR; | ||
1025 | goto errout; | ||
1026 | } | ||
1027 | 987 | ||
1028 | if (ext4_match(namelen, name, de)) { | 988 | retval = search_dirblock(bh, dir, d_name, |
1029 | *res_dir = de; | 989 | block << EXT4_BLOCK_SIZE_BITS(sb), |
1030 | dx_release(frames); | 990 | res_dir); |
1031 | return bh; | 991 | if (retval == 1) { /* Success! */ |
1032 | } | 992 | dx_release(frames); |
993 | return bh; | ||
1033 | } | 994 | } |
1034 | brelse(bh); | 995 | brelse(bh); |
996 | if (retval == -1) { | ||
997 | *err = ERR_BAD_DX_DIR; | ||
998 | goto errout; | ||
999 | } | ||
1000 | |||
1035 | /* Check to see if we should continue to search */ | 1001 | /* Check to see if we should continue to search */ |
1036 | retval = ext4_htree_next_block(dir, hash, frame, | 1002 | retval = ext4_htree_next_block(dir, hinfo.hash, frame, |
1037 | frames, NULL); | 1003 | frames, NULL); |
1038 | if (retval < 0) { | 1004 | if (retval < 0) { |
1039 | ext4_warning(sb, | 1005 | ext4_warning(sb, |
@@ -1088,7 +1054,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1088 | struct dentry *ext4_get_parent(struct dentry *child) | 1054 | struct dentry *ext4_get_parent(struct dentry *child) |
1089 | { | 1055 | { |
1090 | __u32 ino; | 1056 | __u32 ino; |
1091 | struct inode *inode; | ||
1092 | static const struct qstr dotdot = { | 1057 | static const struct qstr dotdot = { |
1093 | .name = "..", | 1058 | .name = "..", |
1094 | .len = 2, | 1059 | .len = 2, |
@@ -1097,7 +1062,6 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1097 | struct buffer_head *bh; | 1062 | struct buffer_head *bh; |
1098 | 1063 | ||
1099 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); | 1064 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); |
1100 | inode = NULL; | ||
1101 | if (!bh) | 1065 | if (!bh) |
1102 | return ERR_PTR(-ENOENT); | 1066 | return ERR_PTR(-ENOENT); |
1103 | ino = le32_to_cpu(de->inode); | 1067 | ino = le32_to_cpu(de->inode); |
@@ -1305,8 +1269,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1305 | de = (struct ext4_dir_entry_2 *)bh->b_data; | 1269 | de = (struct ext4_dir_entry_2 *)bh->b_data; |
1306 | top = bh->b_data + blocksize - reclen; | 1270 | top = bh->b_data + blocksize - reclen; |
1307 | while ((char *) de <= top) { | 1271 | while ((char *) de <= top) { |
1308 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1272 | if (!ext4_check_dir_entry(dir, de, bh, offset)) |
1309 | bh, offset)) | ||
1310 | return -EIO; | 1273 | return -EIO; |
1311 | if (ext4_match(namelen, name, de)) | 1274 | if (ext4_match(namelen, name, de)) |
1312 | return -EEXIST; | 1275 | return -EEXIST; |
@@ -1673,7 +1636,7 @@ static int ext4_delete_entry(handle_t *handle, | |||
1673 | pde = NULL; | 1636 | pde = NULL; |
1674 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1637 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1675 | while (i < bh->b_size) { | 1638 | while (i < bh->b_size) { |
1676 | if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i)) | 1639 | if (!ext4_check_dir_entry(dir, de, bh, i)) |
1677 | return -EIO; | 1640 | return -EIO; |
1678 | if (de == de_del) { | 1641 | if (de == de_del) { |
1679 | BUFFER_TRACE(bh, "get_write_access"); | 1642 | BUFFER_TRACE(bh, "get_write_access"); |
@@ -1956,7 +1919,7 @@ static int empty_dir(struct inode *inode) | |||
1956 | } | 1919 | } |
1957 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1920 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1958 | } | 1921 | } |
1959 | if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) { | 1922 | if (!ext4_check_dir_entry(inode, de, bh, offset)) { |
1960 | de = (struct ext4_dir_entry_2 *)(bh->b_data + | 1923 | de = (struct ext4_dir_entry_2 *)(bh->b_data + |
1961 | sb->s_blocksize); | 1924 | sb->s_blocksize); |
1962 | offset = (offset | (sb->s_blocksize - 1)) + 1; | 1925 | offset = (offset | (sb->s_blocksize - 1)) + 1; |
@@ -2340,7 +2303,7 @@ retry: | |||
2340 | 2303 | ||
2341 | inode->i_ctime = ext4_current_time(inode); | 2304 | inode->i_ctime = ext4_current_time(inode); |
2342 | ext4_inc_count(handle, inode); | 2305 | ext4_inc_count(handle, inode); |
2343 | atomic_inc(&inode->i_count); | 2306 | ihold(inode); |
2344 | 2307 | ||
2345 | err = ext4_add_entry(handle, dentry, inode); | 2308 | err = ext4_add_entry(handle, dentry, inode); |
2346 | if (!err) { | 2309 | if (!err) { |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c new file mode 100644 index 000000000000..46a7d6a9d976 --- /dev/null +++ b/fs/ext4/page-io.c | |||
@@ -0,0 +1,430 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/page-io.c | ||
3 | * | ||
4 | * This contains the new page_io functions for ext4 | ||
5 | * | ||
6 | * Written by Theodore Ts'o, 2010. | ||
7 | */ | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/time.h> | ||
12 | #include <linux/jbd2.h> | ||
13 | #include <linux/highuid.h> | ||
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/quotaops.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/buffer_head.h> | ||
18 | #include <linux/writeback.h> | ||
19 | #include <linux/pagevec.h> | ||
20 | #include <linux/mpage.h> | ||
21 | #include <linux/namei.h> | ||
22 | #include <linux/uio.h> | ||
23 | #include <linux/bio.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | #include "ext4_jbd2.h" | ||
29 | #include "xattr.h" | ||
30 | #include "acl.h" | ||
31 | #include "ext4_extents.h" | ||
32 | |||
33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | ||
34 | |||
35 | int __init ext4_init_pageio(void) | ||
36 | { | ||
37 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | ||
38 | if (io_page_cachep == NULL) | ||
39 | return -ENOMEM; | ||
40 | io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); | ||
41 | if (io_page_cachep == NULL) { | ||
42 | kmem_cache_destroy(io_page_cachep); | ||
43 | return -ENOMEM; | ||
44 | } | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | void ext4_exit_pageio(void) | ||
50 | { | ||
51 | kmem_cache_destroy(io_end_cachep); | ||
52 | kmem_cache_destroy(io_page_cachep); | ||
53 | } | ||
54 | |||
55 | void ext4_free_io_end(ext4_io_end_t *io) | ||
56 | { | ||
57 | int i; | ||
58 | |||
59 | BUG_ON(!io); | ||
60 | if (io->page) | ||
61 | put_page(io->page); | ||
62 | for (i = 0; i < io->num_io_pages; i++) { | ||
63 | if (--io->pages[i]->p_count == 0) { | ||
64 | struct page *page = io->pages[i]->p_page; | ||
65 | |||
66 | end_page_writeback(page); | ||
67 | put_page(page); | ||
68 | kmem_cache_free(io_page_cachep, io->pages[i]); | ||
69 | } | ||
70 | } | ||
71 | io->num_io_pages = 0; | ||
72 | iput(io->inode); | ||
73 | kmem_cache_free(io_end_cachep, io); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * check a range of space and convert unwritten extents to written. | ||
78 | */ | ||
79 | int ext4_end_io_nolock(ext4_io_end_t *io) | ||
80 | { | ||
81 | struct inode *inode = io->inode; | ||
82 | loff_t offset = io->offset; | ||
83 | ssize_t size = io->size; | ||
84 | int ret = 0; | ||
85 | |||
86 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
87 | "list->prev 0x%p\n", | ||
88 | io, inode->i_ino, io->list.next, io->list.prev); | ||
89 | |||
90 | if (list_empty(&io->list)) | ||
91 | return ret; | ||
92 | |||
93 | if (!(io->flag & EXT4_IO_END_UNWRITTEN)) | ||
94 | return ret; | ||
95 | |||
96 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
97 | if (ret < 0) { | ||
98 | printk(KERN_EMERG "%s: failed to convert unwritten " | ||
99 | "extents to written extents, error is %d " | ||
100 | "io is still on inode %lu aio dio list\n", | ||
101 | __func__, ret, inode->i_ino); | ||
102 | return ret; | ||
103 | } | ||
104 | |||
105 | if (io->iocb) | ||
106 | aio_complete(io->iocb, io->result, 0); | ||
107 | /* clear the DIO AIO unwritten flag */ | ||
108 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
114 | */ | ||
115 | static void ext4_end_io_work(struct work_struct *work) | ||
116 | { | ||
117 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
118 | struct inode *inode = io->inode; | ||
119 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
120 | unsigned long flags; | ||
121 | int ret; | ||
122 | |||
123 | mutex_lock(&inode->i_mutex); | ||
124 | ret = ext4_end_io_nolock(io); | ||
125 | if (ret < 0) { | ||
126 | mutex_unlock(&inode->i_mutex); | ||
127 | return; | ||
128 | } | ||
129 | |||
130 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
131 | if (!list_empty(&io->list)) | ||
132 | list_del_init(&io->list); | ||
133 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
134 | mutex_unlock(&inode->i_mutex); | ||
135 | ext4_free_io_end(io); | ||
136 | } | ||
137 | |||
138 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | ||
139 | { | ||
140 | ext4_io_end_t *io = NULL; | ||
141 | |||
142 | io = kmem_cache_alloc(io_end_cachep, flags); | ||
143 | if (io) { | ||
144 | memset(io, 0, sizeof(*io)); | ||
145 | io->inode = igrab(inode); | ||
146 | BUG_ON(!io->inode); | ||
147 | INIT_WORK(&io->work, ext4_end_io_work); | ||
148 | INIT_LIST_HEAD(&io->list); | ||
149 | } | ||
150 | return io; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
155 | * provides compatibility with dmesg scrapers that look for a specific | ||
156 | * buffer I/O error message. We really need a unified error reporting | ||
157 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
158 | * probably not going to happen in my lifetime, due to LKML politics... | ||
159 | */ | ||
160 | static void buffer_io_error(struct buffer_head *bh) | ||
161 | { | ||
162 | char b[BDEVNAME_SIZE]; | ||
163 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
164 | bdevname(bh->b_bdev, b), | ||
165 | (unsigned long long)bh->b_blocknr); | ||
166 | } | ||
167 | |||
168 | static void ext4_end_bio(struct bio *bio, int error) | ||
169 | { | ||
170 | ext4_io_end_t *io_end = bio->bi_private; | ||
171 | struct workqueue_struct *wq; | ||
172 | struct inode *inode; | ||
173 | unsigned long flags; | ||
174 | ext4_fsblk_t err_block; | ||
175 | int i; | ||
176 | |||
177 | BUG_ON(!io_end); | ||
178 | inode = io_end->inode; | ||
179 | bio->bi_private = NULL; | ||
180 | bio->bi_end_io = NULL; | ||
181 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
182 | error = 0; | ||
183 | err_block = bio->bi_sector >> (inode->i_blkbits - 9); | ||
184 | bio_put(bio); | ||
185 | |||
186 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) { | ||
187 | pr_err("sb umounted, discard end_io request for inode %lu\n", | ||
188 | io_end->inode->i_ino); | ||
189 | ext4_free_io_end(io_end); | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | if (error) { | ||
194 | io_end->flag |= EXT4_IO_END_ERROR; | ||
195 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | ||
196 | "(offset %llu size %ld starting block %llu)", | ||
197 | inode->i_ino, | ||
198 | (unsigned long long) io_end->offset, | ||
199 | (long) io_end->size, | ||
200 | (unsigned long long) err_block); | ||
201 | } | ||
202 | |||
203 | for (i = 0; i < io_end->num_io_pages; i++) { | ||
204 | struct page *page = io_end->pages[i]->p_page; | ||
205 | struct buffer_head *bh, *head; | ||
206 | int partial_write = 0; | ||
207 | |||
208 | head = page_buffers(page); | ||
209 | if (error) | ||
210 | SetPageError(page); | ||
211 | BUG_ON(!head); | ||
212 | if (head->b_size == PAGE_CACHE_SIZE) | ||
213 | clear_buffer_dirty(head); | ||
214 | else { | ||
215 | loff_t offset; | ||
216 | loff_t io_end_offset = io_end->offset + io_end->size; | ||
217 | |||
218 | offset = (sector_t) page->index << PAGE_CACHE_SHIFT; | ||
219 | bh = head; | ||
220 | do { | ||
221 | if ((offset >= io_end->offset) && | ||
222 | (offset+bh->b_size <= io_end_offset)) { | ||
223 | if (error) | ||
224 | buffer_io_error(bh); | ||
225 | |||
226 | clear_buffer_dirty(bh); | ||
227 | } | ||
228 | if (buffer_delay(bh)) | ||
229 | partial_write = 1; | ||
230 | else if (!buffer_mapped(bh)) | ||
231 | clear_buffer_dirty(bh); | ||
232 | else if (buffer_dirty(bh)) | ||
233 | partial_write = 1; | ||
234 | offset += bh->b_size; | ||
235 | bh = bh->b_this_page; | ||
236 | } while (bh != head); | ||
237 | } | ||
238 | |||
239 | if (--io_end->pages[i]->p_count == 0) { | ||
240 | struct page *page = io_end->pages[i]->p_page; | ||
241 | |||
242 | end_page_writeback(page); | ||
243 | put_page(page); | ||
244 | kmem_cache_free(io_page_cachep, io_end->pages[i]); | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * If this is a partial write which happened to make | ||
249 | * all buffers uptodate then we can optimize away a | ||
250 | * bogus readpage() for the next read(). Here we | ||
251 | * 'discover' whether the page went uptodate as a | ||
252 | * result of this (potentially partial) write. | ||
253 | */ | ||
254 | if (!partial_write) | ||
255 | SetPageUptodate(page); | ||
256 | } | ||
257 | |||
258 | io_end->num_io_pages = 0; | ||
259 | |||
260 | /* Add the io_end to per-inode completed io list*/ | ||
261 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
262 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
263 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
264 | |||
265 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
266 | /* queue the work to convert unwritten extents to written */ | ||
267 | queue_work(wq, &io_end->work); | ||
268 | } | ||
269 | |||
270 | void ext4_io_submit(struct ext4_io_submit *io) | ||
271 | { | ||
272 | struct bio *bio = io->io_bio; | ||
273 | |||
274 | if (bio) { | ||
275 | bio_get(io->io_bio); | ||
276 | submit_bio(io->io_op, io->io_bio); | ||
277 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); | ||
278 | bio_put(io->io_bio); | ||
279 | } | ||
280 | io->io_bio = 0; | ||
281 | io->io_op = 0; | ||
282 | io->io_end = 0; | ||
283 | } | ||
284 | |||
285 | static int io_submit_init(struct ext4_io_submit *io, | ||
286 | struct inode *inode, | ||
287 | struct writeback_control *wbc, | ||
288 | struct buffer_head *bh) | ||
289 | { | ||
290 | ext4_io_end_t *io_end; | ||
291 | struct page *page = bh->b_page; | ||
292 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | ||
293 | struct bio *bio; | ||
294 | |||
295 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
296 | if (!io_end) | ||
297 | return -ENOMEM; | ||
298 | do { | ||
299 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
300 | nvecs >>= 1; | ||
301 | } while (bio == NULL); | ||
302 | |||
303 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | ||
304 | bio->bi_bdev = bh->b_bdev; | ||
305 | bio->bi_private = io->io_end = io_end; | ||
306 | bio->bi_end_io = ext4_end_bio; | ||
307 | |||
308 | io_end->inode = inode; | ||
309 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
310 | |||
311 | io->io_bio = bio; | ||
312 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? | ||
313 | WRITE_SYNC_PLUG : WRITE); | ||
314 | io->io_next_block = bh->b_blocknr; | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | static int io_submit_add_bh(struct ext4_io_submit *io, | ||
319 | struct ext4_io_page *io_page, | ||
320 | struct inode *inode, | ||
321 | struct writeback_control *wbc, | ||
322 | struct buffer_head *bh) | ||
323 | { | ||
324 | ext4_io_end_t *io_end; | ||
325 | int ret; | ||
326 | |||
327 | if (buffer_new(bh)) { | ||
328 | clear_buffer_new(bh); | ||
329 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
330 | } | ||
331 | |||
332 | if (!buffer_mapped(bh) || buffer_delay(bh)) { | ||
333 | if (!buffer_mapped(bh)) | ||
334 | clear_buffer_dirty(bh); | ||
335 | if (io->io_bio) | ||
336 | ext4_io_submit(io); | ||
337 | return 0; | ||
338 | } | ||
339 | |||
340 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | ||
341 | submit_and_retry: | ||
342 | ext4_io_submit(io); | ||
343 | } | ||
344 | if (io->io_bio == NULL) { | ||
345 | ret = io_submit_init(io, inode, wbc, bh); | ||
346 | if (ret) | ||
347 | return ret; | ||
348 | } | ||
349 | io_end = io->io_end; | ||
350 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | ||
351 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | ||
352 | goto submit_and_retry; | ||
353 | if (buffer_uninit(bh)) | ||
354 | io->io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
355 | io->io_end->size += bh->b_size; | ||
356 | io->io_next_block++; | ||
357 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
358 | if (ret != bh->b_size) | ||
359 | goto submit_and_retry; | ||
360 | if ((io_end->num_io_pages == 0) || | ||
361 | (io_end->pages[io_end->num_io_pages-1] != io_page)) { | ||
362 | io_end->pages[io_end->num_io_pages++] = io_page; | ||
363 | io_page->p_count++; | ||
364 | } | ||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | int ext4_bio_write_page(struct ext4_io_submit *io, | ||
369 | struct page *page, | ||
370 | int len, | ||
371 | struct writeback_control *wbc) | ||
372 | { | ||
373 | struct inode *inode = page->mapping->host; | ||
374 | unsigned block_start, block_end, blocksize; | ||
375 | struct ext4_io_page *io_page; | ||
376 | struct buffer_head *bh, *head; | ||
377 | int ret = 0; | ||
378 | |||
379 | blocksize = 1 << inode->i_blkbits; | ||
380 | |||
381 | BUG_ON(PageWriteback(page)); | ||
382 | set_page_writeback(page); | ||
383 | ClearPageError(page); | ||
384 | |||
385 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | ||
386 | if (!io_page) { | ||
387 | set_page_dirty(page); | ||
388 | unlock_page(page); | ||
389 | return -ENOMEM; | ||
390 | } | ||
391 | io_page->p_page = page; | ||
392 | io_page->p_count = 0; | ||
393 | get_page(page); | ||
394 | |||
395 | for (bh = head = page_buffers(page), block_start = 0; | ||
396 | bh != head || !block_start; | ||
397 | block_start = block_end, bh = bh->b_this_page) { | ||
398 | block_end = block_start + blocksize; | ||
399 | if (block_start >= len) { | ||
400 | clear_buffer_dirty(bh); | ||
401 | set_buffer_uptodate(bh); | ||
402 | continue; | ||
403 | } | ||
404 | ret = io_submit_add_bh(io, io_page, inode, wbc, bh); | ||
405 | if (ret) { | ||
406 | /* | ||
407 | * We only get here on ENOMEM. Not much else | ||
408 | * we can do but mark the page as dirty, and | ||
409 | * better luck next time. | ||
410 | */ | ||
411 | set_page_dirty(page); | ||
412 | break; | ||
413 | } | ||
414 | } | ||
415 | unlock_page(page); | ||
416 | /* | ||
417 | * If the page was truncated before we could do the writeback, | ||
418 | * or we had a memory allocation error while trying to write | ||
419 | * the first buffer head, we won't have submitted any pages for | ||
420 | * I/O. In that case we need to make sure we've cleared the | ||
421 | * PageWriteback bit from the page to prevent the system from | ||
422 | * wedging later on. | ||
423 | */ | ||
424 | if (io_page->p_count == 0) { | ||
425 | put_page(page); | ||
426 | end_page_writeback(page); | ||
427 | kmem_cache_free(io_page_cachep, io_page); | ||
428 | } | ||
429 | return ret; | ||
430 | } | ||
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6df797eb9aeb..dc963929de65 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
226 | } | 226 | } |
227 | 227 | ||
228 | /* Zero out all of the reserved backup group descriptor table blocks */ | 228 | /* Zero out all of the reserved backup group descriptor table blocks */ |
229 | for (i = 0, bit = gdblocks + 1, block = start + bit; | 229 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", |
230 | i < reserved_gdb; i++, block++, bit++) { | 230 | block, sbi->s_itb_per_group); |
231 | struct buffer_head *gdb; | 231 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
232 | 232 | GFP_NOFS); | |
233 | ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); | 233 | if (err) |
234 | 234 | goto exit_bh; | |
235 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | ||
236 | goto exit_bh; | ||
237 | 235 | ||
238 | if (IS_ERR(gdb = bclean(handle, sb, block))) { | ||
239 | err = PTR_ERR(gdb); | ||
240 | goto exit_bh; | ||
241 | } | ||
242 | ext4_handle_dirty_metadata(handle, NULL, gdb); | ||
243 | ext4_set_bit(bit, bh->b_data); | ||
244 | brelse(gdb); | ||
245 | } | ||
246 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | 236 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, |
247 | input->block_bitmap - start); | 237 | input->block_bitmap - start); |
248 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | 238 | ext4_set_bit(input->block_bitmap - start, bh->b_data); |
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
251 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | 241 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); |
252 | 242 | ||
253 | /* Zero out all of the inode table blocks */ | 243 | /* Zero out all of the inode table blocks */ |
254 | for (i = 0, block = input->inode_table, bit = block - start; | 244 | block = input->inode_table; |
255 | i < sbi->s_itb_per_group; i++, bit++, block++) { | 245 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", |
256 | struct buffer_head *it; | 246 | block, sbi->s_itb_per_group); |
257 | 247 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | |
258 | ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); | 248 | if (err) |
259 | 249 | goto exit_bh; | |
260 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | ||
261 | goto exit_bh; | ||
262 | |||
263 | if (IS_ERR(it = bclean(handle, sb, block))) { | ||
264 | err = PTR_ERR(it); | ||
265 | goto exit_bh; | ||
266 | } | ||
267 | ext4_handle_dirty_metadata(handle, NULL, it); | ||
268 | brelse(it); | ||
269 | ext4_set_bit(bit, bh->b_data); | ||
270 | } | ||
271 | 250 | ||
272 | if ((err = extend_or_restart_transaction(handle, 2, bh))) | 251 | if ((err = extend_or_restart_transaction(handle, 2, bh))) |
273 | goto exit_bh; | 252 | goto exit_bh; |
274 | 253 | ||
275 | mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); | 254 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, |
255 | bh->b_data); | ||
276 | ext4_handle_dirty_metadata(handle, NULL, bh); | 256 | ext4_handle_dirty_metadata(handle, NULL, bh); |
277 | brelse(bh); | 257 | brelse(bh); |
278 | /* Mark unused entries in inode bitmap used */ | 258 | /* Mark unused entries in inode bitmap used */ |
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
283 | goto exit_journal; | 263 | goto exit_journal; |
284 | } | 264 | } |
285 | 265 | ||
286 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 266 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
287 | bh->b_data); | 267 | bh->b_data); |
288 | ext4_handle_dirty_metadata(handle, NULL, bh); | 268 | ext4_handle_dirty_metadata(handle, NULL, bh); |
289 | exit_bh: | 269 | exit_bh: |
290 | brelse(bh); | 270 | brelse(bh); |
@@ -921,8 +901,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
921 | &sbi->s_flex_groups[flex_group].free_inodes); | 901 | &sbi->s_flex_groups[flex_group].free_inodes); |
922 | } | 902 | } |
923 | 903 | ||
924 | ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); | 904 | ext4_handle_dirty_super(handle, sb); |
925 | sb->s_dirt = 1; | ||
926 | 905 | ||
927 | exit_journal: | 906 | exit_journal: |
928 | mutex_unlock(&sbi->s_resize_lock); | 907 | mutex_unlock(&sbi->s_resize_lock); |
@@ -953,7 +932,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
953 | ext4_fsblk_t n_blocks_count) | 932 | ext4_fsblk_t n_blocks_count) |
954 | { | 933 | { |
955 | ext4_fsblk_t o_blocks_count; | 934 | ext4_fsblk_t o_blocks_count; |
956 | ext4_group_t o_groups_count; | ||
957 | ext4_grpblk_t last; | 935 | ext4_grpblk_t last; |
958 | ext4_grpblk_t add; | 936 | ext4_grpblk_t add; |
959 | struct buffer_head *bh; | 937 | struct buffer_head *bh; |
@@ -965,7 +943,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
965 | * yet: we're going to revalidate es->s_blocks_count after | 943 | * yet: we're going to revalidate es->s_blocks_count after |
966 | * taking the s_resize_lock below. */ | 944 | * taking the s_resize_lock below. */ |
967 | o_blocks_count = ext4_blocks_count(es); | 945 | o_blocks_count = ext4_blocks_count(es); |
968 | o_groups_count = EXT4_SB(sb)->s_groups_count; | ||
969 | 946 | ||
970 | if (test_opt(sb, DEBUG)) | 947 | if (test_opt(sb, DEBUG)) |
971 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", | 948 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", |
@@ -1045,13 +1022,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1045 | goto exit_put; | 1022 | goto exit_put; |
1046 | } | 1023 | } |
1047 | ext4_blocks_count_set(es, o_blocks_count + add); | 1024 | ext4_blocks_count_set(es, o_blocks_count + add); |
1048 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | ||
1049 | sb->s_dirt = 1; | ||
1050 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | 1025 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1051 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1026 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1052 | o_blocks_count + add); | 1027 | o_blocks_count + add); |
1053 | /* We add the blocks to the bitmap and set the group need init bit */ | 1028 | /* We add the blocks to the bitmap and set the group need init bit */ |
1054 | ext4_add_groupblocks(handle, sb, o_blocks_count, add); | 1029 | ext4_add_groupblocks(handle, sb, o_blocks_count, add); |
1030 | ext4_handle_dirty_super(handle, sb); | ||
1055 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | 1031 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, |
1056 | o_blocks_count + add); | 1032 | o_blocks_count + add); |
1057 | if ((err = ext4_journal_stop(handle))) | 1033 | if ((err = ext4_journal_stop(handle))) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4e8983a9811b..0348ce066592 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/parser.h> | 28 | #include <linux/parser.h> |
29 | #include <linux/smp_lock.h> | ||
30 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
31 | #include <linux/exportfs.h> | 30 | #include <linux/exportfs.h> |
32 | #include <linux/vfs.h> | 31 | #include <linux/vfs.h> |
@@ -41,6 +40,9 @@ | |||
41 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
42 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
43 | 42 | ||
43 | #include <linux/kthread.h> | ||
44 | #include <linux/freezer.h> | ||
45 | |||
44 | #include "ext4.h" | 46 | #include "ext4.h" |
45 | #include "ext4_jbd2.h" | 47 | #include "ext4_jbd2.h" |
46 | #include "xattr.h" | 48 | #include "xattr.h" |
@@ -50,8 +52,11 @@ | |||
50 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
51 | #include <trace/events/ext4.h> | 53 | #include <trace/events/ext4.h> |
52 | 54 | ||
53 | struct proc_dir_entry *ext4_proc_root; | 55 | static struct proc_dir_entry *ext4_proc_root; |
54 | static struct kset *ext4_kset; | 56 | static struct kset *ext4_kset; |
57 | struct ext4_lazy_init *ext4_li_info; | ||
58 | struct mutex ext4_li_mtx; | ||
59 | struct ext4_features *ext4_feat; | ||
55 | 60 | ||
56 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
57 | unsigned long journal_devnum); | 62 | unsigned long journal_devnum); |
@@ -70,6 +75,8 @@ static void ext4_write_super(struct super_block *sb); | |||
70 | static int ext4_freeze(struct super_block *sb); | 75 | static int ext4_freeze(struct super_block *sb); |
71 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, | 76 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, |
72 | const char *dev_name, void *data, struct vfsmount *mnt); | 77 | const char *dev_name, void *data, struct vfsmount *mnt); |
78 | static void ext4_destroy_lazyinit_thread(void); | ||
79 | static void ext4_unregister_li_request(struct super_block *sb); | ||
73 | 80 | ||
74 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 81 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
75 | static struct file_system_type ext3_fs_type = { | 82 | static struct file_system_type ext3_fs_type = { |
@@ -241,14 +248,14 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
241 | if (sb->s_flags & MS_RDONLY) | 248 | if (sb->s_flags & MS_RDONLY) |
242 | return ERR_PTR(-EROFS); | 249 | return ERR_PTR(-EROFS); |
243 | 250 | ||
244 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | 251 | vfs_check_frozen(sb, SB_FREEZE_TRANS); |
245 | /* Special case here: if the journal has aborted behind our | 252 | /* Special case here: if the journal has aborted behind our |
246 | * backs (eg. EIO in the commit thread), then we still need to | 253 | * backs (eg. EIO in the commit thread), then we still need to |
247 | * take the FS itself readonly cleanly. */ | 254 | * take the FS itself readonly cleanly. */ |
248 | journal = EXT4_SB(sb)->s_journal; | 255 | journal = EXT4_SB(sb)->s_journal; |
249 | if (journal) { | 256 | if (journal) { |
250 | if (is_journal_aborted(journal)) { | 257 | if (is_journal_aborted(journal)) { |
251 | ext4_abort(sb, __func__, "Detected aborted journal"); | 258 | ext4_abort(sb, "Detected aborted journal"); |
252 | return ERR_PTR(-EROFS); | 259 | return ERR_PTR(-EROFS); |
253 | } | 260 | } |
254 | return jbd2_journal_start(journal, nblocks); | 261 | return jbd2_journal_start(journal, nblocks); |
@@ -262,7 +269,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
262 | * that sync() will call the filesystem's write_super callback if | 269 | * that sync() will call the filesystem's write_super callback if |
263 | * appropriate. | 270 | * appropriate. |
264 | */ | 271 | */ |
265 | int __ext4_journal_stop(const char *where, handle_t *handle) | 272 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) |
266 | { | 273 | { |
267 | struct super_block *sb; | 274 | struct super_block *sb; |
268 | int err; | 275 | int err; |
@@ -279,12 +286,13 @@ int __ext4_journal_stop(const char *where, handle_t *handle) | |||
279 | if (!err) | 286 | if (!err) |
280 | err = rc; | 287 | err = rc; |
281 | if (err) | 288 | if (err) |
282 | __ext4_std_error(sb, where, err); | 289 | __ext4_std_error(sb, where, line, err); |
283 | return err; | 290 | return err; |
284 | } | 291 | } |
285 | 292 | ||
286 | void ext4_journal_abort_handle(const char *caller, const char *err_fn, | 293 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
287 | struct buffer_head *bh, handle_t *handle, int err) | 294 | const char *err_fn, struct buffer_head *bh, |
295 | handle_t *handle, int err) | ||
288 | { | 296 | { |
289 | char nbuf[16]; | 297 | char nbuf[16]; |
290 | const char *errstr = ext4_decode_error(NULL, err, nbuf); | 298 | const char *errstr = ext4_decode_error(NULL, err, nbuf); |
@@ -300,12 +308,47 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, | |||
300 | if (is_handle_aborted(handle)) | 308 | if (is_handle_aborted(handle)) |
301 | return; | 309 | return; |
302 | 310 | ||
303 | printk(KERN_ERR "%s: aborting transaction: %s in %s\n", | 311 | printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", |
304 | caller, errstr, err_fn); | 312 | caller, line, errstr, err_fn); |
305 | 313 | ||
306 | jbd2_journal_abort_handle(handle); | 314 | jbd2_journal_abort_handle(handle); |
307 | } | 315 | } |
308 | 316 | ||
317 | static void __save_error_info(struct super_block *sb, const char *func, | ||
318 | unsigned int line) | ||
319 | { | ||
320 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
321 | |||
322 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
323 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
324 | es->s_last_error_time = cpu_to_le32(get_seconds()); | ||
325 | strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); | ||
326 | es->s_last_error_line = cpu_to_le32(line); | ||
327 | if (!es->s_first_error_time) { | ||
328 | es->s_first_error_time = es->s_last_error_time; | ||
329 | strncpy(es->s_first_error_func, func, | ||
330 | sizeof(es->s_first_error_func)); | ||
331 | es->s_first_error_line = cpu_to_le32(line); | ||
332 | es->s_first_error_ino = es->s_last_error_ino; | ||
333 | es->s_first_error_block = es->s_last_error_block; | ||
334 | } | ||
335 | /* | ||
336 | * Start the daily error reporting function if it hasn't been | ||
337 | * started already | ||
338 | */ | ||
339 | if (!es->s_error_count) | ||
340 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); | ||
341 | es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); | ||
342 | } | ||
343 | |||
344 | static void save_error_info(struct super_block *sb, const char *func, | ||
345 | unsigned int line) | ||
346 | { | ||
347 | __save_error_info(sb, func, line); | ||
348 | ext4_commit_super(sb, 1); | ||
349 | } | ||
350 | |||
351 | |||
309 | /* Deal with the reporting of failure conditions on a filesystem such as | 352 | /* Deal with the reporting of failure conditions on a filesystem such as |
310 | * inconsistencies detected or read IO failures. | 353 | * inconsistencies detected or read IO failures. |
311 | * | 354 | * |
@@ -323,11 +366,6 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, | |||
323 | 366 | ||
324 | static void ext4_handle_error(struct super_block *sb) | 367 | static void ext4_handle_error(struct super_block *sb) |
325 | { | 368 | { |
326 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
327 | |||
328 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
329 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
330 | |||
331 | if (sb->s_flags & MS_RDONLY) | 369 | if (sb->s_flags & MS_RDONLY) |
332 | return; | 370 | return; |
333 | 371 | ||
@@ -342,19 +380,19 @@ static void ext4_handle_error(struct super_block *sb) | |||
342 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 380 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
343 | sb->s_flags |= MS_RDONLY; | 381 | sb->s_flags |= MS_RDONLY; |
344 | } | 382 | } |
345 | ext4_commit_super(sb, 1); | ||
346 | if (test_opt(sb, ERRORS_PANIC)) | 383 | if (test_opt(sb, ERRORS_PANIC)) |
347 | panic("EXT4-fs (device %s): panic forced after error\n", | 384 | panic("EXT4-fs (device %s): panic forced after error\n", |
348 | sb->s_id); | 385 | sb->s_id); |
349 | } | 386 | } |
350 | 387 | ||
351 | void __ext4_error(struct super_block *sb, const char *function, | 388 | void __ext4_error(struct super_block *sb, const char *function, |
352 | const char *fmt, ...) | 389 | unsigned int line, const char *fmt, ...) |
353 | { | 390 | { |
354 | va_list args; | 391 | va_list args; |
355 | 392 | ||
356 | va_start(args, fmt); | 393 | va_start(args, fmt); |
357 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 394 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ", |
395 | sb->s_id, function, line, current->comm); | ||
358 | vprintk(fmt, args); | 396 | vprintk(fmt, args); |
359 | printk("\n"); | 397 | printk("\n"); |
360 | va_end(args); | 398 | va_end(args); |
@@ -362,14 +400,22 @@ void __ext4_error(struct super_block *sb, const char *function, | |||
362 | ext4_handle_error(sb); | 400 | ext4_handle_error(sb); |
363 | } | 401 | } |
364 | 402 | ||
365 | void ext4_error_inode(const char *function, struct inode *inode, | 403 | void ext4_error_inode(struct inode *inode, const char *function, |
404 | unsigned int line, ext4_fsblk_t block, | ||
366 | const char *fmt, ...) | 405 | const char *fmt, ...) |
367 | { | 406 | { |
368 | va_list args; | 407 | va_list args; |
408 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
369 | 409 | ||
410 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | ||
411 | es->s_last_error_block = cpu_to_le64(block); | ||
412 | save_error_info(inode->i_sb, function, line); | ||
370 | va_start(args, fmt); | 413 | va_start(args, fmt); |
371 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", | 414 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", |
372 | inode->i_sb->s_id, function, inode->i_ino, current->comm); | 415 | inode->i_sb->s_id, function, line, inode->i_ino); |
416 | if (block) | ||
417 | printk("block %llu: ", block); | ||
418 | printk("comm %s: ", current->comm); | ||
373 | vprintk(fmt, args); | 419 | vprintk(fmt, args); |
374 | printk("\n"); | 420 | printk("\n"); |
375 | va_end(args); | 421 | va_end(args); |
@@ -377,20 +423,26 @@ void ext4_error_inode(const char *function, struct inode *inode, | |||
377 | ext4_handle_error(inode->i_sb); | 423 | ext4_handle_error(inode->i_sb); |
378 | } | 424 | } |
379 | 425 | ||
380 | void ext4_error_file(const char *function, struct file *file, | 426 | void ext4_error_file(struct file *file, const char *function, |
381 | const char *fmt, ...) | 427 | unsigned int line, const char *fmt, ...) |
382 | { | 428 | { |
383 | va_list args; | 429 | va_list args; |
430 | struct ext4_super_block *es; | ||
384 | struct inode *inode = file->f_dentry->d_inode; | 431 | struct inode *inode = file->f_dentry->d_inode; |
385 | char pathname[80], *path; | 432 | char pathname[80], *path; |
386 | 433 | ||
434 | es = EXT4_SB(inode->i_sb)->s_es; | ||
435 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | ||
436 | save_error_info(inode->i_sb, function, line); | ||
387 | va_start(args, fmt); | 437 | va_start(args, fmt); |
388 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); | 438 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); |
389 | if (!path) | 439 | if (!path) |
390 | path = "(unknown)"; | 440 | path = "(unknown)"; |
391 | printk(KERN_CRIT | 441 | printk(KERN_CRIT |
392 | "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", | 442 | "EXT4-fs error (device %s): %s:%d: inode #%lu " |
393 | inode->i_sb->s_id, function, inode->i_ino, current->comm, path); | 443 | "(comm %s path %s): ", |
444 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
445 | current->comm, path); | ||
394 | vprintk(fmt, args); | 446 | vprintk(fmt, args); |
395 | printk("\n"); | 447 | printk("\n"); |
396 | va_end(args); | 448 | va_end(args); |
@@ -435,7 +487,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, | |||
435 | /* __ext4_std_error decodes expected errors from journaling functions | 487 | /* __ext4_std_error decodes expected errors from journaling functions |
436 | * automatically and invokes the appropriate error response. */ | 488 | * automatically and invokes the appropriate error response. */ |
437 | 489 | ||
438 | void __ext4_std_error(struct super_block *sb, const char *function, int errno) | 490 | void __ext4_std_error(struct super_block *sb, const char *function, |
491 | unsigned int line, int errno) | ||
439 | { | 492 | { |
440 | char nbuf[16]; | 493 | char nbuf[16]; |
441 | const char *errstr; | 494 | const char *errstr; |
@@ -448,8 +501,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno) | |||
448 | return; | 501 | return; |
449 | 502 | ||
450 | errstr = ext4_decode_error(sb, errno, nbuf); | 503 | errstr = ext4_decode_error(sb, errno, nbuf); |
451 | printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", | 504 | printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", |
452 | sb->s_id, function, errstr); | 505 | sb->s_id, function, line, errstr); |
506 | save_error_info(sb, function, line); | ||
453 | 507 | ||
454 | ext4_handle_error(sb); | 508 | ext4_handle_error(sb); |
455 | } | 509 | } |
@@ -464,29 +518,29 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno) | |||
464 | * case we take the easy way out and panic immediately. | 518 | * case we take the easy way out and panic immediately. |
465 | */ | 519 | */ |
466 | 520 | ||
467 | void ext4_abort(struct super_block *sb, const char *function, | 521 | void __ext4_abort(struct super_block *sb, const char *function, |
468 | const char *fmt, ...) | 522 | unsigned int line, const char *fmt, ...) |
469 | { | 523 | { |
470 | va_list args; | 524 | va_list args; |
471 | 525 | ||
526 | save_error_info(sb, function, line); | ||
472 | va_start(args, fmt); | 527 | va_start(args, fmt); |
473 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 528 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, |
529 | function, line); | ||
474 | vprintk(fmt, args); | 530 | vprintk(fmt, args); |
475 | printk("\n"); | 531 | printk("\n"); |
476 | va_end(args); | 532 | va_end(args); |
477 | 533 | ||
534 | if ((sb->s_flags & MS_RDONLY) == 0) { | ||
535 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | ||
536 | sb->s_flags |= MS_RDONLY; | ||
537 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | ||
538 | if (EXT4_SB(sb)->s_journal) | ||
539 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | ||
540 | save_error_info(sb, function, line); | ||
541 | } | ||
478 | if (test_opt(sb, ERRORS_PANIC)) | 542 | if (test_opt(sb, ERRORS_PANIC)) |
479 | panic("EXT4-fs panic from previous error\n"); | 543 | panic("EXT4-fs panic from previous error\n"); |
480 | |||
481 | if (sb->s_flags & MS_RDONLY) | ||
482 | return; | ||
483 | |||
484 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | ||
485 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
486 | sb->s_flags |= MS_RDONLY; | ||
487 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | ||
488 | if (EXT4_SB(sb)->s_journal) | ||
489 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | ||
490 | } | 544 | } |
491 | 545 | ||
492 | void ext4_msg (struct super_block * sb, const char *prefix, | 546 | void ext4_msg (struct super_block * sb, const char *prefix, |
@@ -502,38 +556,47 @@ void ext4_msg (struct super_block * sb, const char *prefix, | |||
502 | } | 556 | } |
503 | 557 | ||
504 | void __ext4_warning(struct super_block *sb, const char *function, | 558 | void __ext4_warning(struct super_block *sb, const char *function, |
505 | const char *fmt, ...) | 559 | unsigned int line, const char *fmt, ...) |
506 | { | 560 | { |
507 | va_list args; | 561 | va_list args; |
508 | 562 | ||
509 | va_start(args, fmt); | 563 | va_start(args, fmt); |
510 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", | 564 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ", |
511 | sb->s_id, function); | 565 | sb->s_id, function, line); |
512 | vprintk(fmt, args); | 566 | vprintk(fmt, args); |
513 | printk("\n"); | 567 | printk("\n"); |
514 | va_end(args); | 568 | va_end(args); |
515 | } | 569 | } |
516 | 570 | ||
517 | void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, | 571 | void __ext4_grp_locked_error(const char *function, unsigned int line, |
518 | const char *function, const char *fmt, ...) | 572 | struct super_block *sb, ext4_group_t grp, |
573 | unsigned long ino, ext4_fsblk_t block, | ||
574 | const char *fmt, ...) | ||
519 | __releases(bitlock) | 575 | __releases(bitlock) |
520 | __acquires(bitlock) | 576 | __acquires(bitlock) |
521 | { | 577 | { |
522 | va_list args; | 578 | va_list args; |
523 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 579 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
524 | 580 | ||
581 | es->s_last_error_ino = cpu_to_le32(ino); | ||
582 | es->s_last_error_block = cpu_to_le64(block); | ||
583 | __save_error_info(sb, function, line); | ||
525 | va_start(args, fmt); | 584 | va_start(args, fmt); |
526 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 585 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", |
586 | sb->s_id, function, line, grp); | ||
587 | if (ino) | ||
588 | printk("inode %lu: ", ino); | ||
589 | if (block) | ||
590 | printk("block %llu:", (unsigned long long) block); | ||
527 | vprintk(fmt, args); | 591 | vprintk(fmt, args); |
528 | printk("\n"); | 592 | printk("\n"); |
529 | va_end(args); | 593 | va_end(args); |
530 | 594 | ||
531 | if (test_opt(sb, ERRORS_CONT)) { | 595 | if (test_opt(sb, ERRORS_CONT)) { |
532 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
533 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
534 | ext4_commit_super(sb, 0); | 596 | ext4_commit_super(sb, 0); |
535 | return; | 597 | return; |
536 | } | 598 | } |
599 | |||
537 | ext4_unlock_group(sb, grp); | 600 | ext4_unlock_group(sb, grp); |
538 | ext4_handle_error(sb); | 601 | ext4_handle_error(sb); |
539 | /* | 602 | /* |
@@ -646,13 +709,13 @@ static void ext4_put_super(struct super_block *sb) | |||
646 | struct ext4_super_block *es = sbi->s_es; | 709 | struct ext4_super_block *es = sbi->s_es; |
647 | int i, err; | 710 | int i, err; |
648 | 711 | ||
712 | ext4_unregister_li_request(sb); | ||
649 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 713 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
650 | 714 | ||
651 | flush_workqueue(sbi->dio_unwritten_wq); | 715 | flush_workqueue(sbi->dio_unwritten_wq); |
652 | destroy_workqueue(sbi->dio_unwritten_wq); | 716 | destroy_workqueue(sbi->dio_unwritten_wq); |
653 | 717 | ||
654 | lock_super(sb); | 718 | lock_super(sb); |
655 | lock_kernel(); | ||
656 | if (sb->s_dirt) | 719 | if (sb->s_dirt) |
657 | ext4_commit_super(sb, 1); | 720 | ext4_commit_super(sb, 1); |
658 | 721 | ||
@@ -660,10 +723,10 @@ static void ext4_put_super(struct super_block *sb) | |||
660 | err = jbd2_journal_destroy(sbi->s_journal); | 723 | err = jbd2_journal_destroy(sbi->s_journal); |
661 | sbi->s_journal = NULL; | 724 | sbi->s_journal = NULL; |
662 | if (err < 0) | 725 | if (err < 0) |
663 | ext4_abort(sb, __func__, | 726 | ext4_abort(sb, "Couldn't clean up the journal"); |
664 | "Couldn't clean up the journal"); | ||
665 | } | 727 | } |
666 | 728 | ||
729 | del_timer(&sbi->s_err_report); | ||
667 | ext4_release_system_zone(sb); | 730 | ext4_release_system_zone(sb); |
668 | ext4_mb_release(sb); | 731 | ext4_mb_release(sb); |
669 | ext4_ext_release(sb); | 732 | ext4_ext_release(sb); |
@@ -720,7 +783,6 @@ static void ext4_put_super(struct super_block *sb) | |||
720 | * Now that we are completely done shutting down the | 783 | * Now that we are completely done shutting down the |
721 | * superblock, we need to actually destroy the kobject. | 784 | * superblock, we need to actually destroy the kobject. |
722 | */ | 785 | */ |
723 | unlock_kernel(); | ||
724 | unlock_super(sb); | 786 | unlock_super(sb); |
725 | kobject_put(&sbi->s_kobj); | 787 | kobject_put(&sbi->s_kobj); |
726 | wait_for_completion(&sbi->s_kobj_unregister); | 788 | wait_for_completion(&sbi->s_kobj_unregister); |
@@ -813,8 +875,10 @@ static void destroy_inodecache(void) | |||
813 | kmem_cache_destroy(ext4_inode_cachep); | 875 | kmem_cache_destroy(ext4_inode_cachep); |
814 | } | 876 | } |
815 | 877 | ||
816 | static void ext4_clear_inode(struct inode *inode) | 878 | void ext4_clear_inode(struct inode *inode) |
817 | { | 879 | { |
880 | invalidate_inode_buffers(inode); | ||
881 | end_writeback(inode); | ||
818 | dquot_drop(inode); | 882 | dquot_drop(inode); |
819 | ext4_discard_preallocations(inode); | 883 | ext4_discard_preallocations(inode); |
820 | if (EXT4_JOURNAL(inode)) | 884 | if (EXT4_JOURNAL(inode)) |
@@ -946,14 +1010,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
946 | seq_puts(seq, ",journal_async_commit"); | 1010 | seq_puts(seq, ",journal_async_commit"); |
947 | else if (test_opt(sb, JOURNAL_CHECKSUM)) | 1011 | else if (test_opt(sb, JOURNAL_CHECKSUM)) |
948 | seq_puts(seq, ",journal_checksum"); | 1012 | seq_puts(seq, ",journal_checksum"); |
949 | if (test_opt(sb, NOBH)) | ||
950 | seq_puts(seq, ",nobh"); | ||
951 | if (test_opt(sb, I_VERSION)) | 1013 | if (test_opt(sb, I_VERSION)) |
952 | seq_puts(seq, ",i_version"); | 1014 | seq_puts(seq, ",i_version"); |
953 | if (!test_opt(sb, DELALLOC)) | 1015 | if (!test_opt(sb, DELALLOC) && |
1016 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | ||
954 | seq_puts(seq, ",nodelalloc"); | 1017 | seq_puts(seq, ",nodelalloc"); |
955 | 1018 | ||
956 | |||
957 | if (sbi->s_stripe) | 1019 | if (sbi->s_stripe) |
958 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 1020 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
959 | /* | 1021 | /* |
@@ -977,7 +1039,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
977 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) | 1039 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) |
978 | seq_puts(seq, ",noauto_da_alloc"); | 1040 | seq_puts(seq, ",noauto_da_alloc"); |
979 | 1041 | ||
980 | if (test_opt(sb, DISCARD)) | 1042 | if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) |
981 | seq_puts(seq, ",discard"); | 1043 | seq_puts(seq, ",discard"); |
982 | 1044 | ||
983 | if (test_opt(sb, NOLOAD)) | 1045 | if (test_opt(sb, NOLOAD)) |
@@ -986,6 +1048,16 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
986 | if (test_opt(sb, DIOREAD_NOLOCK)) | 1048 | if (test_opt(sb, DIOREAD_NOLOCK)) |
987 | seq_puts(seq, ",dioread_nolock"); | 1049 | seq_puts(seq, ",dioread_nolock"); |
988 | 1050 | ||
1051 | if (test_opt(sb, BLOCK_VALIDITY) && | ||
1052 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) | ||
1053 | seq_puts(seq, ",block_validity"); | ||
1054 | |||
1055 | if (!test_opt(sb, INIT_INODE_TABLE)) | ||
1056 | seq_puts(seq, ",noinit_inode_table"); | ||
1057 | else if (sbi->s_li_wait_mult) | ||
1058 | seq_printf(seq, ",init_inode_table=%u", | ||
1059 | (unsigned) sbi->s_li_wait_mult); | ||
1060 | |||
989 | ext4_show_quota_options(seq, sb); | 1061 | ext4_show_quota_options(seq, sb); |
990 | 1062 | ||
991 | return 0; | 1063 | return 0; |
@@ -1065,6 +1137,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot); | |||
1065 | static int ext4_write_info(struct super_block *sb, int type); | 1137 | static int ext4_write_info(struct super_block *sb, int type); |
1066 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 1138 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
1067 | char *path); | 1139 | char *path); |
1140 | static int ext4_quota_off(struct super_block *sb, int type); | ||
1068 | static int ext4_quota_on_mount(struct super_block *sb, int type); | 1141 | static int ext4_quota_on_mount(struct super_block *sb, int type); |
1069 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 1142 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
1070 | size_t len, loff_t off); | 1143 | size_t len, loff_t off); |
@@ -1086,7 +1159,7 @@ static const struct dquot_operations ext4_quota_operations = { | |||
1086 | 1159 | ||
1087 | static const struct quotactl_ops ext4_qctl_operations = { | 1160 | static const struct quotactl_ops ext4_qctl_operations = { |
1088 | .quota_on = ext4_quota_on, | 1161 | .quota_on = ext4_quota_on, |
1089 | .quota_off = dquot_quota_off, | 1162 | .quota_off = ext4_quota_off, |
1090 | .quota_sync = dquot_quota_sync, | 1163 | .quota_sync = dquot_quota_sync, |
1091 | .get_info = dquot_get_dqinfo, | 1164 | .get_info = dquot_get_dqinfo, |
1092 | .set_info = dquot_set_dqinfo, | 1165 | .set_info = dquot_set_dqinfo, |
@@ -1100,20 +1173,20 @@ static const struct super_operations ext4_sops = { | |||
1100 | .destroy_inode = ext4_destroy_inode, | 1173 | .destroy_inode = ext4_destroy_inode, |
1101 | .write_inode = ext4_write_inode, | 1174 | .write_inode = ext4_write_inode, |
1102 | .dirty_inode = ext4_dirty_inode, | 1175 | .dirty_inode = ext4_dirty_inode, |
1103 | .delete_inode = ext4_delete_inode, | 1176 | .evict_inode = ext4_evict_inode, |
1104 | .put_super = ext4_put_super, | 1177 | .put_super = ext4_put_super, |
1105 | .sync_fs = ext4_sync_fs, | 1178 | .sync_fs = ext4_sync_fs, |
1106 | .freeze_fs = ext4_freeze, | 1179 | .freeze_fs = ext4_freeze, |
1107 | .unfreeze_fs = ext4_unfreeze, | 1180 | .unfreeze_fs = ext4_unfreeze, |
1108 | .statfs = ext4_statfs, | 1181 | .statfs = ext4_statfs, |
1109 | .remount_fs = ext4_remount, | 1182 | .remount_fs = ext4_remount, |
1110 | .clear_inode = ext4_clear_inode, | ||
1111 | .show_options = ext4_show_options, | 1183 | .show_options = ext4_show_options, |
1112 | #ifdef CONFIG_QUOTA | 1184 | #ifdef CONFIG_QUOTA |
1113 | .quota_read = ext4_quota_read, | 1185 | .quota_read = ext4_quota_read, |
1114 | .quota_write = ext4_quota_write, | 1186 | .quota_write = ext4_quota_write, |
1115 | #endif | 1187 | #endif |
1116 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1188 | .bdev_try_to_free_page = bdev_try_to_free_page, |
1189 | .trim_fs = ext4_trim_fs | ||
1117 | }; | 1190 | }; |
1118 | 1191 | ||
1119 | static const struct super_operations ext4_nojournal_sops = { | 1192 | static const struct super_operations ext4_nojournal_sops = { |
@@ -1121,12 +1194,11 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1121 | .destroy_inode = ext4_destroy_inode, | 1194 | .destroy_inode = ext4_destroy_inode, |
1122 | .write_inode = ext4_write_inode, | 1195 | .write_inode = ext4_write_inode, |
1123 | .dirty_inode = ext4_dirty_inode, | 1196 | .dirty_inode = ext4_dirty_inode, |
1124 | .delete_inode = ext4_delete_inode, | 1197 | .evict_inode = ext4_evict_inode, |
1125 | .write_super = ext4_write_super, | 1198 | .write_super = ext4_write_super, |
1126 | .put_super = ext4_put_super, | 1199 | .put_super = ext4_put_super, |
1127 | .statfs = ext4_statfs, | 1200 | .statfs = ext4_statfs, |
1128 | .remount_fs = ext4_remount, | 1201 | .remount_fs = ext4_remount, |
1129 | .clear_inode = ext4_clear_inode, | ||
1130 | .show_options = ext4_show_options, | 1202 | .show_options = ext4_show_options, |
1131 | #ifdef CONFIG_QUOTA | 1203 | #ifdef CONFIG_QUOTA |
1132 | .quota_read = ext4_quota_read, | 1204 | .quota_read = ext4_quota_read, |
@@ -1161,6 +1233,7 @@ enum { | |||
1161 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1233 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1162 | Opt_dioread_nolock, Opt_dioread_lock, | 1234 | Opt_dioread_nolock, Opt_dioread_lock, |
1163 | Opt_discard, Opt_nodiscard, | 1235 | Opt_discard, Opt_nodiscard, |
1236 | Opt_init_inode_table, Opt_noinit_inode_table, | ||
1164 | }; | 1237 | }; |
1165 | 1238 | ||
1166 | static const match_table_t tokens = { | 1239 | static const match_table_t tokens = { |
@@ -1231,6 +1304,9 @@ static const match_table_t tokens = { | |||
1231 | {Opt_dioread_lock, "dioread_lock"}, | 1304 | {Opt_dioread_lock, "dioread_lock"}, |
1232 | {Opt_discard, "discard"}, | 1305 | {Opt_discard, "discard"}, |
1233 | {Opt_nodiscard, "nodiscard"}, | 1306 | {Opt_nodiscard, "nodiscard"}, |
1307 | {Opt_init_inode_table, "init_itable=%u"}, | ||
1308 | {Opt_init_inode_table, "init_itable"}, | ||
1309 | {Opt_noinit_inode_table, "noinit_itable"}, | ||
1234 | {Opt_err, NULL}, | 1310 | {Opt_err, NULL}, |
1235 | }; | 1311 | }; |
1236 | 1312 | ||
@@ -1624,10 +1700,12 @@ set_qf_format: | |||
1624 | *n_blocks_count = option; | 1700 | *n_blocks_count = option; |
1625 | break; | 1701 | break; |
1626 | case Opt_nobh: | 1702 | case Opt_nobh: |
1627 | set_opt(sbi->s_mount_opt, NOBH); | 1703 | ext4_msg(sb, KERN_WARNING, |
1704 | "Ignoring deprecated nobh option"); | ||
1628 | break; | 1705 | break; |
1629 | case Opt_bh: | 1706 | case Opt_bh: |
1630 | clear_opt(sbi->s_mount_opt, NOBH); | 1707 | ext4_msg(sb, KERN_WARNING, |
1708 | "Ignoring deprecated bh option"); | ||
1631 | break; | 1709 | break; |
1632 | case Opt_i_version: | 1710 | case Opt_i_version: |
1633 | set_opt(sbi->s_mount_opt, I_VERSION); | 1711 | set_opt(sbi->s_mount_opt, I_VERSION); |
@@ -1699,6 +1777,20 @@ set_qf_format: | |||
1699 | case Opt_dioread_lock: | 1777 | case Opt_dioread_lock: |
1700 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | 1778 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); |
1701 | break; | 1779 | break; |
1780 | case Opt_init_inode_table: | ||
1781 | set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
1782 | if (args[0].from) { | ||
1783 | if (match_int(&args[0], &option)) | ||
1784 | return 0; | ||
1785 | } else | ||
1786 | option = EXT4_DEF_LI_WAIT_MULT; | ||
1787 | if (option < 0) | ||
1788 | return 0; | ||
1789 | sbi->s_li_wait_mult = option; | ||
1790 | break; | ||
1791 | case Opt_noinit_inode_table: | ||
1792 | clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
1793 | break; | ||
1702 | default: | 1794 | default: |
1703 | ext4_msg(sb, KERN_ERR, | 1795 | ext4_msg(sb, KERN_ERR, |
1704 | "Unrecognized mount option \"%s\" " | 1796 | "Unrecognized mount option \"%s\" " |
@@ -1882,7 +1974,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, | |||
1882 | } | 1974 | } |
1883 | 1975 | ||
1884 | /* Called at mount-time, super-block is locked */ | 1976 | /* Called at mount-time, super-block is locked */ |
1885 | static int ext4_check_descriptors(struct super_block *sb) | 1977 | static int ext4_check_descriptors(struct super_block *sb, |
1978 | ext4_group_t *first_not_zeroed) | ||
1886 | { | 1979 | { |
1887 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1980 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1888 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); | 1981 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); |
@@ -1891,7 +1984,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1891 | ext4_fsblk_t inode_bitmap; | 1984 | ext4_fsblk_t inode_bitmap; |
1892 | ext4_fsblk_t inode_table; | 1985 | ext4_fsblk_t inode_table; |
1893 | int flexbg_flag = 0; | 1986 | int flexbg_flag = 0; |
1894 | ext4_group_t i; | 1987 | ext4_group_t i, grp = sbi->s_groups_count; |
1895 | 1988 | ||
1896 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1989 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1897 | flexbg_flag = 1; | 1990 | flexbg_flag = 1; |
@@ -1907,6 +2000,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1907 | last_block = first_block + | 2000 | last_block = first_block + |
1908 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2001 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); |
1909 | 2002 | ||
2003 | if ((grp == sbi->s_groups_count) && | ||
2004 | !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2005 | grp = i; | ||
2006 | |||
1910 | block_bitmap = ext4_block_bitmap(sb, gdp); | 2007 | block_bitmap = ext4_block_bitmap(sb, gdp); |
1911 | if (block_bitmap < first_block || block_bitmap > last_block) { | 2008 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1912 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2009 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
@@ -1944,6 +2041,8 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1944 | if (!flexbg_flag) | 2041 | if (!flexbg_flag) |
1945 | first_block += EXT4_BLOCKS_PER_GROUP(sb); | 2042 | first_block += EXT4_BLOCKS_PER_GROUP(sb); |
1946 | } | 2043 | } |
2044 | if (NULL != first_not_zeroed) | ||
2045 | *first_not_zeroed = grp; | ||
1947 | 2046 | ||
1948 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 2047 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); |
1949 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); | 2048 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
@@ -2249,6 +2348,8 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a, | |||
2249 | { | 2348 | { |
2250 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2349 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2251 | 2350 | ||
2351 | if (!sb->s_bdev->bd_part) | ||
2352 | return snprintf(buf, PAGE_SIZE, "0\n"); | ||
2252 | return snprintf(buf, PAGE_SIZE, "%lu\n", | 2353 | return snprintf(buf, PAGE_SIZE, "%lu\n", |
2253 | (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2354 | (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
2254 | sbi->s_sectors_written_start) >> 1); | 2355 | sbi->s_sectors_written_start) >> 1); |
@@ -2259,6 +2360,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | |||
2259 | { | 2360 | { |
2260 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2361 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2261 | 2362 | ||
2363 | if (!sb->s_bdev->bd_part) | ||
2364 | return snprintf(buf, PAGE_SIZE, "0\n"); | ||
2262 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2365 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2263 | (unsigned long long)(sbi->s_kbytes_written + | 2366 | (unsigned long long)(sbi->s_kbytes_written + |
2264 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2367 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
@@ -2312,6 +2415,7 @@ static struct ext4_attr ext4_attr_##_name = { \ | |||
2312 | #define EXT4_ATTR(name, mode, show, store) \ | 2415 | #define EXT4_ATTR(name, mode, show, store) \ |
2313 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2416 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
2314 | 2417 | ||
2418 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) | ||
2315 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) | 2419 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) |
2316 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) | 2420 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) |
2317 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2421 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
@@ -2348,6 +2452,16 @@ static struct attribute *ext4_attrs[] = { | |||
2348 | NULL, | 2452 | NULL, |
2349 | }; | 2453 | }; |
2350 | 2454 | ||
2455 | /* Features this copy of ext4 supports */ | ||
2456 | EXT4_INFO_ATTR(lazy_itable_init); | ||
2457 | EXT4_INFO_ATTR(batched_discard); | ||
2458 | |||
2459 | static struct attribute *ext4_feat_attrs[] = { | ||
2460 | ATTR_LIST(lazy_itable_init), | ||
2461 | ATTR_LIST(batched_discard), | ||
2462 | NULL, | ||
2463 | }; | ||
2464 | |||
2351 | static ssize_t ext4_attr_show(struct kobject *kobj, | 2465 | static ssize_t ext4_attr_show(struct kobject *kobj, |
2352 | struct attribute *attr, char *buf) | 2466 | struct attribute *attr, char *buf) |
2353 | { | 2467 | { |
@@ -2376,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj) | |||
2376 | complete(&sbi->s_kobj_unregister); | 2490 | complete(&sbi->s_kobj_unregister); |
2377 | } | 2491 | } |
2378 | 2492 | ||
2379 | |||
2380 | static const struct sysfs_ops ext4_attr_ops = { | 2493 | static const struct sysfs_ops ext4_attr_ops = { |
2381 | .show = ext4_attr_show, | 2494 | .show = ext4_attr_show, |
2382 | .store = ext4_attr_store, | 2495 | .store = ext4_attr_store, |
@@ -2388,6 +2501,17 @@ static struct kobj_type ext4_ktype = { | |||
2388 | .release = ext4_sb_release, | 2501 | .release = ext4_sb_release, |
2389 | }; | 2502 | }; |
2390 | 2503 | ||
2504 | static void ext4_feat_release(struct kobject *kobj) | ||
2505 | { | ||
2506 | complete(&ext4_feat->f_kobj_unregister); | ||
2507 | } | ||
2508 | |||
2509 | static struct kobj_type ext4_feat_ktype = { | ||
2510 | .default_attrs = ext4_feat_attrs, | ||
2511 | .sysfs_ops = &ext4_attr_ops, | ||
2512 | .release = ext4_feat_release, | ||
2513 | }; | ||
2514 | |||
2391 | /* | 2515 | /* |
2392 | * Check whether this filesystem can be mounted based on | 2516 | * Check whether this filesystem can be mounted based on |
2393 | * the features present and the RDONLY/RDWR mount requested. | 2517 | * the features present and the RDONLY/RDWR mount requested. |
@@ -2431,6 +2555,419 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) | |||
2431 | return 1; | 2555 | return 1; |
2432 | } | 2556 | } |
2433 | 2557 | ||
2558 | /* | ||
2559 | * This function is called once a day if we have errors logged | ||
2560 | * on the file system | ||
2561 | */ | ||
2562 | static void print_daily_error_info(unsigned long arg) | ||
2563 | { | ||
2564 | struct super_block *sb = (struct super_block *) arg; | ||
2565 | struct ext4_sb_info *sbi; | ||
2566 | struct ext4_super_block *es; | ||
2567 | |||
2568 | sbi = EXT4_SB(sb); | ||
2569 | es = sbi->s_es; | ||
2570 | |||
2571 | if (es->s_error_count) | ||
2572 | ext4_msg(sb, KERN_NOTICE, "error count: %u", | ||
2573 | le32_to_cpu(es->s_error_count)); | ||
2574 | if (es->s_first_error_time) { | ||
2575 | printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", | ||
2576 | sb->s_id, le32_to_cpu(es->s_first_error_time), | ||
2577 | (int) sizeof(es->s_first_error_func), | ||
2578 | es->s_first_error_func, | ||
2579 | le32_to_cpu(es->s_first_error_line)); | ||
2580 | if (es->s_first_error_ino) | ||
2581 | printk(": inode %u", | ||
2582 | le32_to_cpu(es->s_first_error_ino)); | ||
2583 | if (es->s_first_error_block) | ||
2584 | printk(": block %llu", (unsigned long long) | ||
2585 | le64_to_cpu(es->s_first_error_block)); | ||
2586 | printk("\n"); | ||
2587 | } | ||
2588 | if (es->s_last_error_time) { | ||
2589 | printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", | ||
2590 | sb->s_id, le32_to_cpu(es->s_last_error_time), | ||
2591 | (int) sizeof(es->s_last_error_func), | ||
2592 | es->s_last_error_func, | ||
2593 | le32_to_cpu(es->s_last_error_line)); | ||
2594 | if (es->s_last_error_ino) | ||
2595 | printk(": inode %u", | ||
2596 | le32_to_cpu(es->s_last_error_ino)); | ||
2597 | if (es->s_last_error_block) | ||
2598 | printk(": block %llu", (unsigned long long) | ||
2599 | le64_to_cpu(es->s_last_error_block)); | ||
2600 | printk("\n"); | ||
2601 | } | ||
2602 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ | ||
2603 | } | ||
2604 | |||
2605 | static void ext4_lazyinode_timeout(unsigned long data) | ||
2606 | { | ||
2607 | struct task_struct *p = (struct task_struct *)data; | ||
2608 | wake_up_process(p); | ||
2609 | } | ||
2610 | |||
2611 | /* Find next suitable group and run ext4_init_inode_table */ | ||
2612 | static int ext4_run_li_request(struct ext4_li_request *elr) | ||
2613 | { | ||
2614 | struct ext4_group_desc *gdp = NULL; | ||
2615 | ext4_group_t group, ngroups; | ||
2616 | struct super_block *sb; | ||
2617 | unsigned long timeout = 0; | ||
2618 | int ret = 0; | ||
2619 | |||
2620 | sb = elr->lr_super; | ||
2621 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
2622 | |||
2623 | for (group = elr->lr_next_group; group < ngroups; group++) { | ||
2624 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2625 | if (!gdp) { | ||
2626 | ret = 1; | ||
2627 | break; | ||
2628 | } | ||
2629 | |||
2630 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2631 | break; | ||
2632 | } | ||
2633 | |||
2634 | if (group == ngroups) | ||
2635 | ret = 1; | ||
2636 | |||
2637 | if (!ret) { | ||
2638 | timeout = jiffies; | ||
2639 | ret = ext4_init_inode_table(sb, group, | ||
2640 | elr->lr_timeout ? 0 : 1); | ||
2641 | if (elr->lr_timeout == 0) { | ||
2642 | timeout = jiffies - timeout; | ||
2643 | if (elr->lr_sbi->s_li_wait_mult) | ||
2644 | timeout *= elr->lr_sbi->s_li_wait_mult; | ||
2645 | else | ||
2646 | timeout *= 20; | ||
2647 | elr->lr_timeout = timeout; | ||
2648 | } | ||
2649 | elr->lr_next_sched = jiffies + elr->lr_timeout; | ||
2650 | elr->lr_next_group = group + 1; | ||
2651 | } | ||
2652 | |||
2653 | return ret; | ||
2654 | } | ||
2655 | |||
2656 | /* | ||
2657 | * Remove lr_request from the list_request and free the | ||
2658 | * request tructure. Should be called with li_list_mtx held | ||
2659 | */ | ||
2660 | static void ext4_remove_li_request(struct ext4_li_request *elr) | ||
2661 | { | ||
2662 | struct ext4_sb_info *sbi; | ||
2663 | |||
2664 | if (!elr) | ||
2665 | return; | ||
2666 | |||
2667 | sbi = elr->lr_sbi; | ||
2668 | |||
2669 | list_del(&elr->lr_request); | ||
2670 | sbi->s_li_request = NULL; | ||
2671 | kfree(elr); | ||
2672 | } | ||
2673 | |||
2674 | static void ext4_unregister_li_request(struct super_block *sb) | ||
2675 | { | ||
2676 | struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; | ||
2677 | |||
2678 | if (!ext4_li_info) | ||
2679 | return; | ||
2680 | |||
2681 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2682 | ext4_remove_li_request(elr); | ||
2683 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2684 | } | ||
2685 | |||
2686 | /* | ||
2687 | * This is the function where ext4lazyinit thread lives. It walks | ||
2688 | * through the request list searching for next scheduled filesystem. | ||
2689 | * When such a fs is found, run the lazy initialization request | ||
2690 | * (ext4_rn_li_request) and keep track of the time spend in this | ||
2691 | * function. Based on that time we compute next schedule time of | ||
2692 | * the request. When walking through the list is complete, compute | ||
2693 | * next waking time and put itself into sleep. | ||
2694 | */ | ||
2695 | static int ext4_lazyinit_thread(void *arg) | ||
2696 | { | ||
2697 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; | ||
2698 | struct list_head *pos, *n; | ||
2699 | struct ext4_li_request *elr; | ||
2700 | unsigned long next_wakeup; | ||
2701 | DEFINE_WAIT(wait); | ||
2702 | int ret; | ||
2703 | |||
2704 | BUG_ON(NULL == eli); | ||
2705 | |||
2706 | eli->li_timer.data = (unsigned long)current; | ||
2707 | eli->li_timer.function = ext4_lazyinode_timeout; | ||
2708 | |||
2709 | eli->li_task = current; | ||
2710 | wake_up(&eli->li_wait_task); | ||
2711 | |||
2712 | cont_thread: | ||
2713 | while (true) { | ||
2714 | next_wakeup = MAX_JIFFY_OFFSET; | ||
2715 | |||
2716 | mutex_lock(&eli->li_list_mtx); | ||
2717 | if (list_empty(&eli->li_request_list)) { | ||
2718 | mutex_unlock(&eli->li_list_mtx); | ||
2719 | goto exit_thread; | ||
2720 | } | ||
2721 | |||
2722 | list_for_each_safe(pos, n, &eli->li_request_list) { | ||
2723 | elr = list_entry(pos, struct ext4_li_request, | ||
2724 | lr_request); | ||
2725 | |||
2726 | if (time_after_eq(jiffies, elr->lr_next_sched)) | ||
2727 | ret = ext4_run_li_request(elr); | ||
2728 | |||
2729 | if (ret) { | ||
2730 | ret = 0; | ||
2731 | ext4_remove_li_request(elr); | ||
2732 | continue; | ||
2733 | } | ||
2734 | |||
2735 | if (time_before(elr->lr_next_sched, next_wakeup)) | ||
2736 | next_wakeup = elr->lr_next_sched; | ||
2737 | } | ||
2738 | mutex_unlock(&eli->li_list_mtx); | ||
2739 | |||
2740 | if (freezing(current)) | ||
2741 | refrigerator(); | ||
2742 | |||
2743 | if (time_after_eq(jiffies, next_wakeup)) { | ||
2744 | cond_resched(); | ||
2745 | continue; | ||
2746 | } | ||
2747 | |||
2748 | eli->li_timer.expires = next_wakeup; | ||
2749 | add_timer(&eli->li_timer); | ||
2750 | prepare_to_wait(&eli->li_wait_daemon, &wait, | ||
2751 | TASK_INTERRUPTIBLE); | ||
2752 | if (time_before(jiffies, next_wakeup)) | ||
2753 | schedule(); | ||
2754 | finish_wait(&eli->li_wait_daemon, &wait); | ||
2755 | } | ||
2756 | |||
2757 | exit_thread: | ||
2758 | /* | ||
2759 | * It looks like the request list is empty, but we need | ||
2760 | * to check it under the li_list_mtx lock, to prevent any | ||
2761 | * additions into it, and of course we should lock ext4_li_mtx | ||
2762 | * to atomically free the list and ext4_li_info, because at | ||
2763 | * this point another ext4 filesystem could be registering | ||
2764 | * new one. | ||
2765 | */ | ||
2766 | mutex_lock(&ext4_li_mtx); | ||
2767 | mutex_lock(&eli->li_list_mtx); | ||
2768 | if (!list_empty(&eli->li_request_list)) { | ||
2769 | mutex_unlock(&eli->li_list_mtx); | ||
2770 | mutex_unlock(&ext4_li_mtx); | ||
2771 | goto cont_thread; | ||
2772 | } | ||
2773 | mutex_unlock(&eli->li_list_mtx); | ||
2774 | del_timer_sync(&ext4_li_info->li_timer); | ||
2775 | eli->li_task = NULL; | ||
2776 | wake_up(&eli->li_wait_task); | ||
2777 | |||
2778 | kfree(ext4_li_info); | ||
2779 | ext4_li_info = NULL; | ||
2780 | mutex_unlock(&ext4_li_mtx); | ||
2781 | |||
2782 | return 0; | ||
2783 | } | ||
2784 | |||
2785 | static void ext4_clear_request_list(void) | ||
2786 | { | ||
2787 | struct list_head *pos, *n; | ||
2788 | struct ext4_li_request *elr; | ||
2789 | |||
2790 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2791 | if (list_empty(&ext4_li_info->li_request_list)) | ||
2792 | return; | ||
2793 | |||
2794 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { | ||
2795 | elr = list_entry(pos, struct ext4_li_request, | ||
2796 | lr_request); | ||
2797 | ext4_remove_li_request(elr); | ||
2798 | } | ||
2799 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2800 | } | ||
2801 | |||
2802 | static int ext4_run_lazyinit_thread(void) | ||
2803 | { | ||
2804 | struct task_struct *t; | ||
2805 | |||
2806 | t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); | ||
2807 | if (IS_ERR(t)) { | ||
2808 | int err = PTR_ERR(t); | ||
2809 | ext4_clear_request_list(); | ||
2810 | del_timer_sync(&ext4_li_info->li_timer); | ||
2811 | kfree(ext4_li_info); | ||
2812 | ext4_li_info = NULL; | ||
2813 | printk(KERN_CRIT "EXT4: error %d creating inode table " | ||
2814 | "initialization thread\n", | ||
2815 | err); | ||
2816 | return err; | ||
2817 | } | ||
2818 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; | ||
2819 | |||
2820 | wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL); | ||
2821 | return 0; | ||
2822 | } | ||
2823 | |||
2824 | /* | ||
2825 | * Check whether it make sense to run itable init. thread or not. | ||
2826 | * If there is at least one uninitialized inode table, return | ||
2827 | * corresponding group number, else the loop goes through all | ||
2828 | * groups and return total number of groups. | ||
2829 | */ | ||
2830 | static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) | ||
2831 | { | ||
2832 | ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; | ||
2833 | struct ext4_group_desc *gdp = NULL; | ||
2834 | |||
2835 | for (group = 0; group < ngroups; group++) { | ||
2836 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2837 | if (!gdp) | ||
2838 | continue; | ||
2839 | |||
2840 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2841 | break; | ||
2842 | } | ||
2843 | |||
2844 | return group; | ||
2845 | } | ||
2846 | |||
2847 | static int ext4_li_info_new(void) | ||
2848 | { | ||
2849 | struct ext4_lazy_init *eli = NULL; | ||
2850 | |||
2851 | eli = kzalloc(sizeof(*eli), GFP_KERNEL); | ||
2852 | if (!eli) | ||
2853 | return -ENOMEM; | ||
2854 | |||
2855 | eli->li_task = NULL; | ||
2856 | INIT_LIST_HEAD(&eli->li_request_list); | ||
2857 | mutex_init(&eli->li_list_mtx); | ||
2858 | |||
2859 | init_waitqueue_head(&eli->li_wait_daemon); | ||
2860 | init_waitqueue_head(&eli->li_wait_task); | ||
2861 | init_timer(&eli->li_timer); | ||
2862 | eli->li_state |= EXT4_LAZYINIT_QUIT; | ||
2863 | |||
2864 | ext4_li_info = eli; | ||
2865 | |||
2866 | return 0; | ||
2867 | } | ||
2868 | |||
2869 | static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, | ||
2870 | ext4_group_t start) | ||
2871 | { | ||
2872 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2873 | struct ext4_li_request *elr; | ||
2874 | unsigned long rnd; | ||
2875 | |||
2876 | elr = kzalloc(sizeof(*elr), GFP_KERNEL); | ||
2877 | if (!elr) | ||
2878 | return NULL; | ||
2879 | |||
2880 | elr->lr_super = sb; | ||
2881 | elr->lr_sbi = sbi; | ||
2882 | elr->lr_next_group = start; | ||
2883 | |||
2884 | /* | ||
2885 | * Randomize first schedule time of the request to | ||
2886 | * spread the inode table initialization requests | ||
2887 | * better. | ||
2888 | */ | ||
2889 | get_random_bytes(&rnd, sizeof(rnd)); | ||
2890 | elr->lr_next_sched = jiffies + (unsigned long)rnd % | ||
2891 | (EXT4_DEF_LI_MAX_START_DELAY * HZ); | ||
2892 | |||
2893 | return elr; | ||
2894 | } | ||
2895 | |||
2896 | static int ext4_register_li_request(struct super_block *sb, | ||
2897 | ext4_group_t first_not_zeroed) | ||
2898 | { | ||
2899 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2900 | struct ext4_li_request *elr; | ||
2901 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
2902 | int ret; | ||
2903 | |||
2904 | if (sbi->s_li_request != NULL) | ||
2905 | return 0; | ||
2906 | |||
2907 | if (first_not_zeroed == ngroups || | ||
2908 | (sb->s_flags & MS_RDONLY) || | ||
2909 | !test_opt(sb, INIT_INODE_TABLE)) { | ||
2910 | sbi->s_li_request = NULL; | ||
2911 | return 0; | ||
2912 | } | ||
2913 | |||
2914 | if (first_not_zeroed == ngroups) { | ||
2915 | sbi->s_li_request = NULL; | ||
2916 | return 0; | ||
2917 | } | ||
2918 | |||
2919 | elr = ext4_li_request_new(sb, first_not_zeroed); | ||
2920 | if (!elr) | ||
2921 | return -ENOMEM; | ||
2922 | |||
2923 | mutex_lock(&ext4_li_mtx); | ||
2924 | |||
2925 | if (NULL == ext4_li_info) { | ||
2926 | ret = ext4_li_info_new(); | ||
2927 | if (ret) | ||
2928 | goto out; | ||
2929 | } | ||
2930 | |||
2931 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2932 | list_add(&elr->lr_request, &ext4_li_info->li_request_list); | ||
2933 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2934 | |||
2935 | sbi->s_li_request = elr; | ||
2936 | |||
2937 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { | ||
2938 | ret = ext4_run_lazyinit_thread(); | ||
2939 | if (ret) | ||
2940 | goto out; | ||
2941 | } | ||
2942 | out: | ||
2943 | mutex_unlock(&ext4_li_mtx); | ||
2944 | if (ret) | ||
2945 | kfree(elr); | ||
2946 | return ret; | ||
2947 | } | ||
2948 | |||
2949 | /* | ||
2950 | * We do not need to lock anything since this is called on | ||
2951 | * module unload. | ||
2952 | */ | ||
2953 | static void ext4_destroy_lazyinit_thread(void) | ||
2954 | { | ||
2955 | /* | ||
2956 | * If thread exited earlier | ||
2957 | * there's nothing to be done. | ||
2958 | */ | ||
2959 | if (!ext4_li_info) | ||
2960 | return; | ||
2961 | |||
2962 | ext4_clear_request_list(); | ||
2963 | |||
2964 | while (ext4_li_info->li_task) { | ||
2965 | wake_up(&ext4_li_info->li_wait_daemon); | ||
2966 | wait_event(ext4_li_info->li_wait_task, | ||
2967 | ext4_li_info->li_task == NULL); | ||
2968 | } | ||
2969 | } | ||
2970 | |||
2434 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2971 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2435 | __releases(kernel_lock) | 2972 | __releases(kernel_lock) |
2436 | __acquires(kernel_lock) | 2973 | __acquires(kernel_lock) |
@@ -2448,7 +2985,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2448 | struct inode *root; | 2985 | struct inode *root; |
2449 | char *cp; | 2986 | char *cp; |
2450 | const char *descr; | 2987 | const char *descr; |
2451 | int ret = -EINVAL; | 2988 | int ret = -ENOMEM; |
2452 | int blocksize; | 2989 | int blocksize; |
2453 | unsigned int db_count; | 2990 | unsigned int db_count; |
2454 | unsigned int i; | 2991 | unsigned int i; |
@@ -2456,16 +2993,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2456 | __u64 blocks_count; | 2993 | __u64 blocks_count; |
2457 | int err; | 2994 | int err; |
2458 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 2995 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
2996 | ext4_group_t first_not_zeroed; | ||
2459 | 2997 | ||
2460 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 2998 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
2461 | if (!sbi) | 2999 | if (!sbi) |
2462 | return -ENOMEM; | 3000 | goto out_free_orig; |
2463 | 3001 | ||
2464 | sbi->s_blockgroup_lock = | 3002 | sbi->s_blockgroup_lock = |
2465 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); | 3003 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); |
2466 | if (!sbi->s_blockgroup_lock) { | 3004 | if (!sbi->s_blockgroup_lock) { |
2467 | kfree(sbi); | 3005 | kfree(sbi); |
2468 | return -ENOMEM; | 3006 | goto out_free_orig; |
2469 | } | 3007 | } |
2470 | sb->s_fs_info = sbi; | 3008 | sb->s_fs_info = sbi; |
2471 | sbi->s_mount_opt = 0; | 3009 | sbi->s_mount_opt = 0; |
@@ -2473,15 +3011,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2473 | sbi->s_resgid = EXT4_DEF_RESGID; | 3011 | sbi->s_resgid = EXT4_DEF_RESGID; |
2474 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | 3012 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; |
2475 | sbi->s_sb_block = sb_block; | 3013 | sbi->s_sb_block = sb_block; |
2476 | sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, | 3014 | if (sb->s_bdev->bd_part) |
2477 | sectors[1]); | 3015 | sbi->s_sectors_written_start = |
2478 | 3016 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); | |
2479 | unlock_kernel(); | ||
2480 | 3017 | ||
2481 | /* Cleanup superblock name */ | 3018 | /* Cleanup superblock name */ |
2482 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | 3019 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) |
2483 | *cp = '!'; | 3020 | *cp = '!'; |
2484 | 3021 | ||
3022 | ret = -EINVAL; | ||
2485 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 3023 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
2486 | if (!blocksize) { | 3024 | if (!blocksize) { |
2487 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); | 3025 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); |
@@ -2516,6 +3054,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2516 | 3054 | ||
2517 | /* Set defaults before we parse the mount options */ | 3055 | /* Set defaults before we parse the mount options */ |
2518 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 3056 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
3057 | set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
2519 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 3058 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
2520 | set_opt(sbi->s_mount_opt, DEBUG); | 3059 | set_opt(sbi->s_mount_opt, DEBUG); |
2521 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { | 3060 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { |
@@ -2546,6 +3085,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2546 | set_opt(sbi->s_mount_opt, ERRORS_CONT); | 3085 | set_opt(sbi->s_mount_opt, ERRORS_CONT); |
2547 | else | 3086 | else |
2548 | set_opt(sbi->s_mount_opt, ERRORS_RO); | 3087 | set_opt(sbi->s_mount_opt, ERRORS_RO); |
3088 | if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) | ||
3089 | set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | ||
3090 | if (def_mount_opts & EXT4_DEFM_DISCARD) | ||
3091 | set_opt(sbi->s_mount_opt, DISCARD); | ||
2549 | 3092 | ||
2550 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); | 3093 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); |
2551 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); | 3094 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); |
@@ -2553,15 +3096,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2553 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; | 3096 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; |
2554 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | 3097 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; |
2555 | 3098 | ||
2556 | set_opt(sbi->s_mount_opt, BARRIER); | 3099 | if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) |
3100 | set_opt(sbi->s_mount_opt, BARRIER); | ||
2557 | 3101 | ||
2558 | /* | 3102 | /* |
2559 | * enable delayed allocation by default | 3103 | * enable delayed allocation by default |
2560 | * Use -o nodelalloc to turn it off | 3104 | * Use -o nodelalloc to turn it off |
2561 | */ | 3105 | */ |
2562 | if (!IS_EXT3_SB(sb)) | 3106 | if (!IS_EXT3_SB(sb) && |
3107 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) | ||
2563 | set_opt(sbi->s_mount_opt, DELALLOC); | 3108 | set_opt(sbi->s_mount_opt, DELALLOC); |
2564 | 3109 | ||
3110 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, | ||
3111 | &journal_devnum, &journal_ioprio, NULL, 0)) { | ||
3112 | ext4_msg(sb, KERN_WARNING, | ||
3113 | "failed to parse options in superblock: %s", | ||
3114 | sbi->s_es->s_mount_opts); | ||
3115 | } | ||
2565 | if (!parse_options((char *) data, sb, &journal_devnum, | 3116 | if (!parse_options((char *) data, sb, &journal_devnum, |
2566 | &journal_ioprio, NULL, 0)) | 3117 | &journal_ioprio, NULL, 0)) |
2567 | goto failed_mount; | 3118 | goto failed_mount; |
@@ -2706,15 +3257,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2706 | * Test whether we have more sectors than will fit in sector_t, | 3257 | * Test whether we have more sectors than will fit in sector_t, |
2707 | * and whether the max offset is addressable by the page cache. | 3258 | * and whether the max offset is addressable by the page cache. |
2708 | */ | 3259 | */ |
2709 | if ((ext4_blocks_count(es) > | 3260 | ret = generic_check_addressable(sb->s_blocksize_bits, |
2710 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | 3261 | ext4_blocks_count(es)); |
2711 | (ext4_blocks_count(es) > | 3262 | if (ret) { |
2712 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
2713 | ext4_msg(sb, KERN_ERR, "filesystem" | 3263 | ext4_msg(sb, KERN_ERR, "filesystem" |
2714 | " too large to mount safely on this system"); | 3264 | " too large to mount safely on this system"); |
2715 | if (sizeof(sector_t) < 8) | 3265 | if (sizeof(sector_t) < 8) |
2716 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 3266 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
2717 | ret = -EFBIG; | ||
2718 | goto failed_mount; | 3267 | goto failed_mount; |
2719 | } | 3268 | } |
2720 | 3269 | ||
@@ -2783,7 +3332,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2783 | goto failed_mount2; | 3332 | goto failed_mount2; |
2784 | } | 3333 | } |
2785 | } | 3334 | } |
2786 | if (!ext4_check_descriptors(sb)) { | 3335 | if (!ext4_check_descriptors(sb, &first_not_zeroed)) { |
2787 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); | 3336 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
2788 | goto failed_mount2; | 3337 | goto failed_mount2; |
2789 | } | 3338 | } |
@@ -2912,18 +3461,7 @@ no_journal: | |||
2912 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 3461 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
2913 | goto failed_mount_wq; | 3462 | goto failed_mount_wq; |
2914 | } | 3463 | } |
2915 | if (test_opt(sb, NOBH)) { | 3464 | |
2916 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { | ||
2917 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " | ||
2918 | "its supported only with writeback mode"); | ||
2919 | clear_opt(sbi->s_mount_opt, NOBH); | ||
2920 | } | ||
2921 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
2922 | ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " | ||
2923 | "not supported with nobh mode"); | ||
2924 | goto failed_mount_wq; | ||
2925 | } | ||
2926 | } | ||
2927 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); | 3465 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); |
2928 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3466 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
2929 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3467 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
@@ -3010,11 +3548,15 @@ no_journal: | |||
3010 | ext4_ext_init(sb); | 3548 | ext4_ext_init(sb); |
3011 | err = ext4_mb_init(sb, needs_recovery); | 3549 | err = ext4_mb_init(sb, needs_recovery); |
3012 | if (err) { | 3550 | if (err) { |
3013 | ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", | 3551 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", |
3014 | err); | 3552 | err); |
3015 | goto failed_mount4; | 3553 | goto failed_mount4; |
3016 | } | 3554 | } |
3017 | 3555 | ||
3556 | err = ext4_register_li_request(sb, first_not_zeroed); | ||
3557 | if (err) | ||
3558 | goto failed_mount4; | ||
3559 | |||
3018 | sbi->s_kobj.kset = ext4_kset; | 3560 | sbi->s_kobj.kset = ext4_kset; |
3019 | init_completion(&sbi->s_kobj_unregister); | 3561 | init_completion(&sbi->s_kobj_unregister); |
3020 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, | 3562 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, |
@@ -3043,9 +3585,15 @@ no_journal: | |||
3043 | descr = "out journal"; | 3585 | descr = "out journal"; |
3044 | 3586 | ||
3045 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " | 3587 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " |
3046 | "Opts: %s", descr, orig_data); | 3588 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, |
3589 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); | ||
3590 | |||
3591 | init_timer(&sbi->s_err_report); | ||
3592 | sbi->s_err_report.function = print_daily_error_info; | ||
3593 | sbi->s_err_report.data = (unsigned long) sb; | ||
3594 | if (es->s_error_count) | ||
3595 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ | ||
3047 | 3596 | ||
3048 | lock_kernel(); | ||
3049 | kfree(orig_data); | 3597 | kfree(orig_data); |
3050 | return 0; | 3598 | return 0; |
3051 | 3599 | ||
@@ -3092,7 +3640,7 @@ out_fail: | |||
3092 | sb->s_fs_info = NULL; | 3640 | sb->s_fs_info = NULL; |
3093 | kfree(sbi->s_blockgroup_lock); | 3641 | kfree(sbi->s_blockgroup_lock); |
3094 | kfree(sbi); | 3642 | kfree(sbi); |
3095 | lock_kernel(); | 3643 | out_free_orig: |
3096 | kfree(orig_data); | 3644 | kfree(orig_data); |
3097 | return ret; | 3645 | return ret; |
3098 | } | 3646 | } |
@@ -3110,7 +3658,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
3110 | journal->j_min_batch_time = sbi->s_min_batch_time; | 3658 | journal->j_min_batch_time = sbi->s_min_batch_time; |
3111 | journal->j_max_batch_time = sbi->s_max_batch_time; | 3659 | journal->j_max_batch_time = sbi->s_max_batch_time; |
3112 | 3660 | ||
3113 | spin_lock(&journal->j_state_lock); | 3661 | write_lock(&journal->j_state_lock); |
3114 | if (test_opt(sb, BARRIER)) | 3662 | if (test_opt(sb, BARRIER)) |
3115 | journal->j_flags |= JBD2_BARRIER; | 3663 | journal->j_flags |= JBD2_BARRIER; |
3116 | else | 3664 | else |
@@ -3119,7 +3667,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
3119 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; | 3667 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; |
3120 | else | 3668 | else |
3121 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; | 3669 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; |
3122 | spin_unlock(&journal->j_state_lock); | 3670 | write_unlock(&journal->j_state_lock); |
3123 | } | 3671 | } |
3124 | 3672 | ||
3125 | static journal_t *ext4_get_journal(struct super_block *sb, | 3673 | static journal_t *ext4_get_journal(struct super_block *sb, |
@@ -3327,8 +3875,17 @@ static int ext4_load_journal(struct super_block *sb, | |||
3327 | 3875 | ||
3328 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) | 3876 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) |
3329 | err = jbd2_journal_wipe(journal, !really_read_only); | 3877 | err = jbd2_journal_wipe(journal, !really_read_only); |
3330 | if (!err) | 3878 | if (!err) { |
3879 | char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); | ||
3880 | if (save) | ||
3881 | memcpy(save, ((char *) es) + | ||
3882 | EXT4_S_ERR_START, EXT4_S_ERR_LEN); | ||
3331 | err = jbd2_journal_load(journal); | 3883 | err = jbd2_journal_load(journal); |
3884 | if (save) | ||
3885 | memcpy(((char *) es) + EXT4_S_ERR_START, | ||
3886 | save, EXT4_S_ERR_LEN); | ||
3887 | kfree(save); | ||
3888 | } | ||
3332 | 3889 | ||
3333 | if (err) { | 3890 | if (err) { |
3334 | ext4_msg(sb, KERN_ERR, "error loading journal"); | 3891 | ext4_msg(sb, KERN_ERR, "error loading journal"); |
@@ -3339,7 +3896,7 @@ static int ext4_load_journal(struct super_block *sb, | |||
3339 | EXT4_SB(sb)->s_journal = journal; | 3896 | EXT4_SB(sb)->s_journal = journal; |
3340 | ext4_clear_journal_err(sb, es); | 3897 | ext4_clear_journal_err(sb, es); |
3341 | 3898 | ||
3342 | if (journal_devnum && | 3899 | if (!really_read_only && journal_devnum && |
3343 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 3900 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3344 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 3901 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
3345 | 3902 | ||
@@ -3384,13 +3941,20 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3384 | */ | 3941 | */ |
3385 | if (!(sb->s_flags & MS_RDONLY)) | 3942 | if (!(sb->s_flags & MS_RDONLY)) |
3386 | es->s_wtime = cpu_to_le32(get_seconds()); | 3943 | es->s_wtime = cpu_to_le32(get_seconds()); |
3387 | es->s_kbytes_written = | 3944 | if (sb->s_bdev->bd_part) |
3388 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3945 | es->s_kbytes_written = |
3946 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | ||
3389 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3947 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
3390 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 3948 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); |
3391 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 3949 | else |
3950 | es->s_kbytes_written = | ||
3951 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | ||
3952 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) | ||
3953 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | ||
3392 | &EXT4_SB(sb)->s_freeblocks_counter)); | 3954 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3393 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | 3955 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) |
3956 | es->s_free_inodes_count = | ||
3957 | cpu_to_le32(percpu_counter_sum_positive( | ||
3394 | &EXT4_SB(sb)->s_freeinodes_counter)); | 3958 | &EXT4_SB(sb)->s_freeinodes_counter)); |
3395 | sb->s_dirt = 0; | 3959 | sb->s_dirt = 0; |
3396 | BUFFER_TRACE(sbh, "marking dirty"); | 3960 | BUFFER_TRACE(sbh, "marking dirty"); |
@@ -3491,7 +4055,7 @@ int ext4_force_commit(struct super_block *sb) | |||
3491 | 4055 | ||
3492 | journal = EXT4_SB(sb)->s_journal; | 4056 | journal = EXT4_SB(sb)->s_journal; |
3493 | if (journal) { | 4057 | if (journal) { |
3494 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | 4058 | vfs_check_frozen(sb, SB_FREEZE_TRANS); |
3495 | ret = ext4_journal_force_commit(journal); | 4059 | ret = ext4_journal_force_commit(journal); |
3496 | } | 4060 | } |
3497 | 4061 | ||
@@ -3587,8 +4151,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3587 | #endif | 4151 | #endif |
3588 | char *orig_data = kstrdup(data, GFP_KERNEL); | 4152 | char *orig_data = kstrdup(data, GFP_KERNEL); |
3589 | 4153 | ||
3590 | lock_kernel(); | ||
3591 | |||
3592 | /* Store the original options */ | 4154 | /* Store the original options */ |
3593 | lock_super(sb); | 4155 | lock_super(sb); |
3594 | old_sb_flags = sb->s_flags; | 4156 | old_sb_flags = sb->s_flags; |
@@ -3616,7 +4178,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3616 | } | 4178 | } |
3617 | 4179 | ||
3618 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) | 4180 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) |
3619 | ext4_abort(sb, __func__, "Abort forced by user"); | 4181 | ext4_abort(sb, "Abort forced by user"); |
3620 | 4182 | ||
3621 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 4183 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
3622 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); | 4184 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
@@ -3711,6 +4273,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3711 | enable_quota = 1; | 4273 | enable_quota = 1; |
3712 | } | 4274 | } |
3713 | } | 4275 | } |
4276 | |||
4277 | /* | ||
4278 | * Reinitialize lazy itable initialization thread based on | ||
4279 | * current settings | ||
4280 | */ | ||
4281 | if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) | ||
4282 | ext4_unregister_li_request(sb); | ||
4283 | else { | ||
4284 | ext4_group_t first_not_zeroed; | ||
4285 | first_not_zeroed = ext4_has_uninit_itable(sb); | ||
4286 | ext4_register_li_request(sb, first_not_zeroed); | ||
4287 | } | ||
4288 | |||
3714 | ext4_setup_system_zone(sb); | 4289 | ext4_setup_system_zone(sb); |
3715 | if (sbi->s_journal == NULL) | 4290 | if (sbi->s_journal == NULL) |
3716 | ext4_commit_super(sb, 1); | 4291 | ext4_commit_super(sb, 1); |
@@ -3723,7 +4298,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3723 | kfree(old_opts.s_qf_names[i]); | 4298 | kfree(old_opts.s_qf_names[i]); |
3724 | #endif | 4299 | #endif |
3725 | unlock_super(sb); | 4300 | unlock_super(sb); |
3726 | unlock_kernel(); | ||
3727 | if (enable_quota) | 4301 | if (enable_quota) |
3728 | dquot_resume(sb, -1); | 4302 | dquot_resume(sb, -1); |
3729 | 4303 | ||
@@ -3749,7 +4323,6 @@ restore_opts: | |||
3749 | } | 4323 | } |
3750 | #endif | 4324 | #endif |
3751 | unlock_super(sb); | 4325 | unlock_super(sb); |
3752 | unlock_kernel(); | ||
3753 | kfree(orig_data); | 4326 | kfree(orig_data); |
3754 | return err; | 4327 | return err; |
3755 | } | 4328 | } |
@@ -3981,6 +4554,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3981 | return err; | 4554 | return err; |
3982 | } | 4555 | } |
3983 | 4556 | ||
4557 | static int ext4_quota_off(struct super_block *sb, int type) | ||
4558 | { | ||
4559 | /* Force all delayed allocation blocks to be allocated */ | ||
4560 | if (test_opt(sb, DELALLOC)) { | ||
4561 | down_read(&sb->s_umount); | ||
4562 | sync_filesystem(sb); | ||
4563 | up_read(&sb->s_umount); | ||
4564 | } | ||
4565 | |||
4566 | return dquot_quota_off(sb, type); | ||
4567 | } | ||
4568 | |||
3984 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 4569 | /* Read data from quotafile - avoid pagecache and such because we cannot afford |
3985 | * acquiring the locks... As quota files are never truncated and quota code | 4570 | * acquiring the locks... As quota files are never truncated and quota code |
3986 | * itself serializes the operations (and noone else should touch the files) | 4571 | * itself serializes the operations (and noone else should touch the files) |
@@ -4030,7 +4615,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
4030 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 4615 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
4031 | int err = 0; | 4616 | int err = 0; |
4032 | int offset = off & (sb->s_blocksize - 1); | 4617 | int offset = off & (sb->s_blocksize - 1); |
4033 | int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; | ||
4034 | struct buffer_head *bh; | 4618 | struct buffer_head *bh; |
4035 | handle_t *handle = journal_current_handle(); | 4619 | handle_t *handle = journal_current_handle(); |
4036 | 4620 | ||
@@ -4055,24 +4639,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
4055 | bh = ext4_bread(handle, inode, blk, 1, &err); | 4639 | bh = ext4_bread(handle, inode, blk, 1, &err); |
4056 | if (!bh) | 4640 | if (!bh) |
4057 | goto out; | 4641 | goto out; |
4058 | if (journal_quota) { | 4642 | err = ext4_journal_get_write_access(handle, bh); |
4059 | err = ext4_journal_get_write_access(handle, bh); | 4643 | if (err) { |
4060 | if (err) { | 4644 | brelse(bh); |
4061 | brelse(bh); | 4645 | goto out; |
4062 | goto out; | ||
4063 | } | ||
4064 | } | 4646 | } |
4065 | lock_buffer(bh); | 4647 | lock_buffer(bh); |
4066 | memcpy(bh->b_data+offset, data, len); | 4648 | memcpy(bh->b_data+offset, data, len); |
4067 | flush_dcache_page(bh->b_page); | 4649 | flush_dcache_page(bh->b_page); |
4068 | unlock_buffer(bh); | 4650 | unlock_buffer(bh); |
4069 | if (journal_quota) | 4651 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
4070 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
4071 | else { | ||
4072 | /* Always do at least ordered writes for quotas */ | ||
4073 | err = ext4_jbd2_file_inode(handle, inode); | ||
4074 | mark_buffer_dirty(bh); | ||
4075 | } | ||
4076 | brelse(bh); | 4652 | brelse(bh); |
4077 | out: | 4653 | out: |
4078 | if (err) { | 4654 | if (err) { |
@@ -4151,23 +4727,53 @@ static struct file_system_type ext4_fs_type = { | |||
4151 | .fs_flags = FS_REQUIRES_DEV, | 4727 | .fs_flags = FS_REQUIRES_DEV, |
4152 | }; | 4728 | }; |
4153 | 4729 | ||
4154 | static int __init init_ext4_fs(void) | 4730 | int __init ext4_init_feat_adverts(void) |
4731 | { | ||
4732 | struct ext4_features *ef; | ||
4733 | int ret = -ENOMEM; | ||
4734 | |||
4735 | ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); | ||
4736 | if (!ef) | ||
4737 | goto out; | ||
4738 | |||
4739 | ef->f_kobj.kset = ext4_kset; | ||
4740 | init_completion(&ef->f_kobj_unregister); | ||
4741 | ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, | ||
4742 | "features"); | ||
4743 | if (ret) { | ||
4744 | kfree(ef); | ||
4745 | goto out; | ||
4746 | } | ||
4747 | |||
4748 | ext4_feat = ef; | ||
4749 | ret = 0; | ||
4750 | out: | ||
4751 | return ret; | ||
4752 | } | ||
4753 | |||
4754 | static int __init ext4_init_fs(void) | ||
4155 | { | 4755 | { |
4156 | int err; | 4756 | int err; |
4157 | 4757 | ||
4158 | ext4_check_flag_values(); | 4758 | ext4_check_flag_values(); |
4159 | err = init_ext4_system_zone(); | 4759 | err = ext4_init_pageio(); |
4160 | if (err) | 4760 | if (err) |
4161 | return err; | 4761 | return err; |
4762 | err = ext4_init_system_zone(); | ||
4763 | if (err) | ||
4764 | goto out5; | ||
4162 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 4765 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
4163 | if (!ext4_kset) | 4766 | if (!ext4_kset) |
4164 | goto out4; | 4767 | goto out4; |
4165 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 4768 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
4166 | err = init_ext4_mballoc(); | 4769 | |
4770 | err = ext4_init_feat_adverts(); | ||
4771 | |||
4772 | err = ext4_init_mballoc(); | ||
4167 | if (err) | 4773 | if (err) |
4168 | goto out3; | 4774 | goto out3; |
4169 | 4775 | ||
4170 | err = init_ext4_xattr(); | 4776 | err = ext4_init_xattr(); |
4171 | if (err) | 4777 | if (err) |
4172 | goto out2; | 4778 | goto out2; |
4173 | err = init_inodecache(); | 4779 | err = init_inodecache(); |
@@ -4178,38 +4784,46 @@ static int __init init_ext4_fs(void) | |||
4178 | err = register_filesystem(&ext4_fs_type); | 4784 | err = register_filesystem(&ext4_fs_type); |
4179 | if (err) | 4785 | if (err) |
4180 | goto out; | 4786 | goto out; |
4787 | |||
4788 | ext4_li_info = NULL; | ||
4789 | mutex_init(&ext4_li_mtx); | ||
4181 | return 0; | 4790 | return 0; |
4182 | out: | 4791 | out: |
4183 | unregister_as_ext2(); | 4792 | unregister_as_ext2(); |
4184 | unregister_as_ext3(); | 4793 | unregister_as_ext3(); |
4185 | destroy_inodecache(); | 4794 | destroy_inodecache(); |
4186 | out1: | 4795 | out1: |
4187 | exit_ext4_xattr(); | 4796 | ext4_exit_xattr(); |
4188 | out2: | 4797 | out2: |
4189 | exit_ext4_mballoc(); | 4798 | ext4_exit_mballoc(); |
4190 | out3: | 4799 | out3: |
4800 | kfree(ext4_feat); | ||
4191 | remove_proc_entry("fs/ext4", NULL); | 4801 | remove_proc_entry("fs/ext4", NULL); |
4192 | kset_unregister(ext4_kset); | 4802 | kset_unregister(ext4_kset); |
4193 | out4: | 4803 | out4: |
4194 | exit_ext4_system_zone(); | 4804 | ext4_exit_system_zone(); |
4805 | out5: | ||
4806 | ext4_exit_pageio(); | ||
4195 | return err; | 4807 | return err; |
4196 | } | 4808 | } |
4197 | 4809 | ||
4198 | static void __exit exit_ext4_fs(void) | 4810 | static void __exit ext4_exit_fs(void) |
4199 | { | 4811 | { |
4812 | ext4_destroy_lazyinit_thread(); | ||
4200 | unregister_as_ext2(); | 4813 | unregister_as_ext2(); |
4201 | unregister_as_ext3(); | 4814 | unregister_as_ext3(); |
4202 | unregister_filesystem(&ext4_fs_type); | 4815 | unregister_filesystem(&ext4_fs_type); |
4203 | destroy_inodecache(); | 4816 | destroy_inodecache(); |
4204 | exit_ext4_xattr(); | 4817 | ext4_exit_xattr(); |
4205 | exit_ext4_mballoc(); | 4818 | ext4_exit_mballoc(); |
4206 | remove_proc_entry("fs/ext4", NULL); | 4819 | remove_proc_entry("fs/ext4", NULL); |
4207 | kset_unregister(ext4_kset); | 4820 | kset_unregister(ext4_kset); |
4208 | exit_ext4_system_zone(); | 4821 | ext4_exit_system_zone(); |
4822 | ext4_exit_pageio(); | ||
4209 | } | 4823 | } |
4210 | 4824 | ||
4211 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 4825 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
4212 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); | 4826 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); |
4213 | MODULE_LICENSE("GPL"); | 4827 | MODULE_LICENSE("GPL"); |
4214 | module_init(init_ext4_fs) | 4828 | module_init(ext4_init_fs) |
4215 | module_exit(exit_ext4_fs) | 4829 | module_exit(ext4_exit_fs) |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 04338009793a..fa4b899da4b3 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -458,8 +458,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, | |||
458 | 458 | ||
459 | if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { | 459 | if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { |
460 | EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); | 460 | EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); |
461 | sb->s_dirt = 1; | 461 | ext4_handle_dirty_super(handle, sb); |
462 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | ||
463 | } | 462 | } |
464 | } | 463 | } |
465 | 464 | ||
@@ -1418,7 +1417,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh) | |||
1418 | ea_bdebug(bh, "out of memory"); | 1417 | ea_bdebug(bh, "out of memory"); |
1419 | return; | 1418 | return; |
1420 | } | 1419 | } |
1421 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); | 1420 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); |
1422 | if (error) { | 1421 | if (error) { |
1423 | mb_cache_entry_free(ce); | 1422 | mb_cache_entry_free(ce); |
1424 | if (error == -EBUSY) { | 1423 | if (error == -EBUSY) { |
@@ -1490,8 +1489,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, | |||
1490 | return NULL; /* never share */ | 1489 | return NULL; /* never share */ |
1491 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); | 1490 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); |
1492 | again: | 1491 | again: |
1493 | ce = mb_cache_entry_find_first(ext4_xattr_cache, 0, | 1492 | ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, |
1494 | inode->i_sb->s_bdev, hash); | 1493 | hash); |
1495 | while (ce) { | 1494 | while (ce) { |
1496 | struct buffer_head *bh; | 1495 | struct buffer_head *bh; |
1497 | 1496 | ||
@@ -1515,7 +1514,7 @@ again: | |||
1515 | return bh; | 1514 | return bh; |
1516 | } | 1515 | } |
1517 | brelse(bh); | 1516 | brelse(bh); |
1518 | ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); | 1517 | ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); |
1519 | } | 1518 | } |
1520 | return NULL; | 1519 | return NULL; |
1521 | } | 1520 | } |
@@ -1589,18 +1588,16 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, | |||
1589 | #undef BLOCK_HASH_SHIFT | 1588 | #undef BLOCK_HASH_SHIFT |
1590 | 1589 | ||
1591 | int __init | 1590 | int __init |
1592 | init_ext4_xattr(void) | 1591 | ext4_init_xattr(void) |
1593 | { | 1592 | { |
1594 | ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL, | 1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); |
1595 | sizeof(struct mb_cache_entry) + | ||
1596 | sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); | ||
1597 | if (!ext4_xattr_cache) | 1594 | if (!ext4_xattr_cache) |
1598 | return -ENOMEM; | 1595 | return -ENOMEM; |
1599 | return 0; | 1596 | return 0; |
1600 | } | 1597 | } |
1601 | 1598 | ||
1602 | void | 1599 | void |
1603 | exit_ext4_xattr(void) | 1600 | ext4_exit_xattr(void) |
1604 | { | 1601 | { |
1605 | if (ext4_xattr_cache) | 1602 | if (ext4_xattr_cache) |
1606 | mb_cache_destroy(ext4_xattr_cache); | 1603 | mb_cache_destroy(ext4_xattr_cache); |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 518e96e43905..1ef16520b950 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *); | |||
83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | 83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, |
84 | struct ext4_inode *raw_inode, handle_t *handle); | 84 | struct ext4_inode *raw_inode, handle_t *handle); |
85 | 85 | ||
86 | extern int init_ext4_xattr(void); | 86 | extern int __init ext4_init_xattr(void); |
87 | extern void exit_ext4_xattr(void); | 87 | extern void ext4_exit_xattr(void); |
88 | 88 | ||
89 | extern const struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb) | |||
121 | { | 121 | { |
122 | } | 122 | } |
123 | 123 | ||
124 | static inline int | 124 | static __init inline int |
125 | init_ext4_xattr(void) | 125 | ext4_init_xattr(void) |
126 | { | 126 | { |
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static inline void | 130 | static inline void |
131 | exit_ext4_xattr(void) | 131 | ext4_exit_xattr(void) |
132 | { | 132 | { |
133 | } | 133 | } |
134 | 134 | ||