aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/acl.c7
-rw-r--r--fs/ext4/balloc.c13
-rw-r--r--fs/ext4/dir.c9
-rw-r--r--fs/ext4/ext4.h123
-rw-r--r--fs/ext4/ext4_extents.h6
-rw-r--r--fs/ext4/ext4_jbd2.c102
-rw-r--r--fs/ext4/ext4_jbd2.h51
-rw-r--r--fs/ext4/extents.c316
-rw-r--r--fs/ext4/extents_status.c631
-rw-r--r--fs/ext4/extents_status.h86
-rw-r--r--fs/ext4/file.c18
-rw-r--r--fs/ext4/hash.c6
-rw-r--r--fs/ext4/ialloc.c29
-rw-r--r--fs/ext4/indirect.c259
-rw-r--r--fs/ext4/inline.c14
-rw-r--r--fs/ext4/inode.c680
-rw-r--r--fs/ext4/ioctl.c15
-rw-r--r--fs/ext4/mballoc.c69
-rw-r--r--fs/ext4/mballoc.h4
-rw-r--r--fs/ext4/migrate.c15
-rw-r--r--fs/ext4/mmp.c4
-rw-r--r--fs/ext4/move_extent.c16
-rw-r--r--fs/ext4/namei.c501
-rw-r--r--fs/ext4/page-io.c85
-rw-r--r--fs/ext4/resize.c36
-rw-r--r--fs/ext4/super.c487
-rw-r--r--fs/ext4/xattr.c23
-rw-r--r--fs/ext4/xattr.h68
29 files changed, 2074 insertions, 1601 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 0a475c881852..987358740cb9 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -41,6 +41,7 @@ config EXT4_USE_FOR_EXT23
41 41
42config EXT4_FS_POSIX_ACL 42config EXT4_FS_POSIX_ACL
43 bool "Ext4 POSIX Access Control Lists" 43 bool "Ext4 POSIX Access Control Lists"
44 depends on EXT4_FS
44 select FS_POSIX_ACL 45 select FS_POSIX_ACL
45 help 46 help
46 POSIX Access Control Lists (ACLs) support permissions for users and 47 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -53,6 +54,7 @@ config EXT4_FS_POSIX_ACL
53 54
54config EXT4_FS_SECURITY 55config EXT4_FS_SECURITY
55 bool "Ext4 Security Labels" 56 bool "Ext4 Security Labels"
57 depends on EXT4_FS
56 help 58 help
57 Security labels support alternative access control models 59 Security labels support alternative access control models
58 implemented by security modules like SELinux. This option 60 implemented by security modules like SELinux. This option
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index e6e0d988439b..39a54a0e9fe4 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -324,8 +324,8 @@ ext4_acl_chmod(struct inode *inode)
324 if (error) 324 if (error)
325 return error; 325 return error;
326retry: 326retry:
327 handle = ext4_journal_start(inode, 327 handle = ext4_journal_start(inode, EXT4_HT_XATTR,
328 EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 328 ext4_jbd2_credits_xattr(inode));
329 if (IS_ERR(handle)) { 329 if (IS_ERR(handle)) {
330 error = PTR_ERR(handle); 330 error = PTR_ERR(handle);
331 ext4_std_error(inode->i_sb, error); 331 ext4_std_error(inode->i_sb, error);
@@ -422,7 +422,8 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
422 acl = NULL; 422 acl = NULL;
423 423
424retry: 424retry:
425 handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 425 handle = ext4_journal_start(inode, EXT4_HT_XATTR,
426 ext4_jbd2_credits_xattr(inode));
426 if (IS_ERR(handle)) { 427 if (IS_ERR(handle)) {
427 error = PTR_ERR(handle); 428 error = PTR_ERR(handle);
428 goto release_and_out; 429 goto release_and_out;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index cf1821784a16..2f2e0da1a6b7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -358,7 +358,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
358} 358}
359 359
360/** 360/**
361 * ext4_read_block_bitmap() 361 * ext4_read_block_bitmap_nowait()
362 * @sb: super block 362 * @sb: super block
363 * @block_group: given block group 363 * @block_group: given block group
364 * 364 *
@@ -457,6 +457,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
457 struct buffer_head *bh; 457 struct buffer_head *bh;
458 458
459 bh = ext4_read_block_bitmap_nowait(sb, block_group); 459 bh = ext4_read_block_bitmap_nowait(sb, block_group);
460 if (!bh)
461 return NULL;
460 if (ext4_wait_block_bitmap(sb, block_group, bh)) { 462 if (ext4_wait_block_bitmap(sb, block_group, bh)) {
461 put_bh(bh); 463 put_bh(bh);
462 return NULL; 464 return NULL;
@@ -482,11 +484,16 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
482 484
483 free_clusters = percpu_counter_read_positive(fcc); 485 free_clusters = percpu_counter_read_positive(fcc);
484 dirty_clusters = percpu_counter_read_positive(dcc); 486 dirty_clusters = percpu_counter_read_positive(dcc);
485 root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es)); 487
488 /*
489 * r_blocks_count should always be multiple of the cluster ratio so
490 * we are safe to do a plane bit shift only.
491 */
492 root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
486 493
487 if (free_clusters - (nclusters + root_clusters + dirty_clusters) < 494 if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
488 EXT4_FREECLUSTERS_WATERMARK) { 495 EXT4_FREECLUSTERS_WATERMARK) {
489 free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc)); 496 free_clusters = percpu_counter_sum_positive(fcc);
490 dirty_clusters = percpu_counter_sum_positive(dcc); 497 dirty_clusters = percpu_counter_sum_positive(dcc);
491 } 498 }
492 /* Check whether we have space after accounting for current 499 /* Check whether we have space after accounting for current
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 80a28b297279..6dda04f05ef4 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -110,7 +110,7 @@ static int ext4_readdir(struct file *filp,
110 int i, stored; 110 int i, stored;
111 struct ext4_dir_entry_2 *de; 111 struct ext4_dir_entry_2 *de;
112 int err; 112 int err;
113 struct inode *inode = filp->f_path.dentry->d_inode; 113 struct inode *inode = file_inode(filp);
114 struct super_block *sb = inode->i_sb; 114 struct super_block *sb = inode->i_sb;
115 int ret = 0; 115 int ret = 0;
116 int dir_has_error = 0; 116 int dir_has_error = 0;
@@ -133,7 +133,7 @@ static int ext4_readdir(struct file *filp,
133 * We don't set the inode dirty flag since it's not 133 * We don't set the inode dirty flag since it's not
134 * critical that it get flushed back to the disk. 134 * critical that it get flushed back to the disk.
135 */ 135 */
136 ext4_clear_inode_flag(filp->f_path.dentry->d_inode, 136 ext4_clear_inode_flag(file_inode(filp),
137 EXT4_INODE_INDEX); 137 EXT4_INODE_INDEX);
138 } 138 }
139 stored = 0; 139 stored = 0;
@@ -185,6 +185,7 @@ static int ext4_readdir(struct file *filp,
185 "at offset %llu", 185 "at offset %llu",
186 (unsigned long long)filp->f_pos); 186 (unsigned long long)filp->f_pos);
187 filp->f_pos += sb->s_blocksize - offset; 187 filp->f_pos += sb->s_blocksize - offset;
188 brelse(bh);
188 continue; 189 continue;
189 } 190 }
190 set_buffer_verified(bh); 191 set_buffer_verified(bh);
@@ -494,7 +495,7 @@ static int call_filldir(struct file *filp, void *dirent,
494{ 495{
495 struct dir_private_info *info = filp->private_data; 496 struct dir_private_info *info = filp->private_data;
496 loff_t curr_pos; 497 loff_t curr_pos;
497 struct inode *inode = filp->f_path.dentry->d_inode; 498 struct inode *inode = file_inode(filp);
498 struct super_block *sb; 499 struct super_block *sb;
499 int error; 500 int error;
500 501
@@ -526,7 +527,7 @@ static int ext4_dx_readdir(struct file *filp,
526 void *dirent, filldir_t filldir) 527 void *dirent, filldir_t filldir)
527{ 528{
528 struct dir_private_info *info = filp->private_data; 529 struct dir_private_info *info = filp->private_data;
529 struct inode *inode = filp->f_path.dentry->d_inode; 530 struct inode *inode = file_inode(filp);
530 struct fname *fname; 531 struct fname *fname;
531 int ret; 532 int ret;
532 533
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8462eb3c33aa..6e16c1867959 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -194,8 +194,7 @@ struct mpage_da_data {
194 */ 194 */
195#define EXT4_IO_END_UNWRITTEN 0x0001 195#define EXT4_IO_END_UNWRITTEN 0x0001
196#define EXT4_IO_END_ERROR 0x0002 196#define EXT4_IO_END_ERROR 0x0002
197#define EXT4_IO_END_QUEUED 0x0004 197#define EXT4_IO_END_DIRECT 0x0004
198#define EXT4_IO_END_DIRECT 0x0008
199 198
200struct ext4_io_page { 199struct ext4_io_page {
201 struct page *p_page; 200 struct page *p_page;
@@ -215,10 +214,8 @@ typedef struct ext4_io_end {
215 struct list_head list; /* per-file finished IO list */ 214 struct list_head list; /* per-file finished IO list */
216 struct inode *inode; /* file being written to */ 215 struct inode *inode; /* file being written to */
217 unsigned int flag; /* unwritten or not */ 216 unsigned int flag; /* unwritten or not */
218 struct page *page; /* for writepage() path */
219 loff_t offset; /* offset in the file */ 217 loff_t offset; /* offset in the file */
220 ssize_t size; /* size of the extent */ 218 ssize_t size; /* size of the extent */
221 struct work_struct work; /* data work queue */
222 struct kiocb *iocb; /* iocb struct for AIO */ 219 struct kiocb *iocb; /* iocb struct for AIO */
223 int result; /* error value for AIO */ 220 int result; /* error value for AIO */
224 int num_io_pages; /* for writepages() */ 221 int num_io_pages; /* for writepages() */
@@ -582,6 +579,8 @@ enum {
582#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 579#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
583 /* Do not take i_data_sem locking in ext4_map_blocks */ 580 /* Do not take i_data_sem locking in ext4_map_blocks */
584#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 581#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
582 /* Do not put hole in extent cache */
583#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
585 584
586/* 585/*
587 * Flags used by ext4_free_blocks 586 * Flags used by ext4_free_blocks
@@ -810,17 +809,6 @@ do { \
810 809
811#endif /* defined(__KERNEL__) || defined(__linux__) */ 810#endif /* defined(__KERNEL__) || defined(__linux__) */
812 811
813/*
814 * storage for cached extent
815 * If ec_len == 0, then the cache is invalid.
816 * If ec_start == 0, then the cache represents a gap (null mapping)
817 */
818struct ext4_ext_cache {
819 ext4_fsblk_t ec_start;
820 ext4_lblk_t ec_block;
821 __u32 ec_len; /* must be 32bit to return holes */
822};
823
824#include "extents_status.h" 812#include "extents_status.h"
825 813
826/* 814/*
@@ -887,7 +875,6 @@ struct ext4_inode_info {
887 struct inode vfs_inode; 875 struct inode vfs_inode;
888 struct jbd2_inode *jinode; 876 struct jbd2_inode *jinode;
889 877
890 struct ext4_ext_cache i_cached_extent;
891 /* 878 /*
892 * File creation time. Its function is same as that of 879 * File creation time. Its function is same as that of
893 * struct timespec i_{a,c,m}time in the generic inode. 880 * struct timespec i_{a,c,m}time in the generic inode.
@@ -901,6 +888,8 @@ struct ext4_inode_info {
901 /* extents status tree */ 888 /* extents status tree */
902 struct ext4_es_tree i_es_tree; 889 struct ext4_es_tree i_es_tree;
903 rwlock_t i_es_lock; 890 rwlock_t i_es_lock;
891 struct list_head i_es_lru;
892 unsigned int i_es_lru_nr; /* protected by i_es_lock */
904 893
905 /* ialloc */ 894 /* ialloc */
906 ext4_group_t i_last_alloc_group; 895 ext4_group_t i_last_alloc_group;
@@ -930,6 +919,7 @@ struct ext4_inode_info {
930 spinlock_t i_completed_io_lock; 919 spinlock_t i_completed_io_lock;
931 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 920 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
932 atomic_t i_unwritten; /* Nr. of inflight conversions pending */ 921 atomic_t i_unwritten; /* Nr. of inflight conversions pending */
922 struct work_struct i_unwritten_work; /* deferred extent conversion */
933 923
934 spinlock_t i_block_reservation_lock; 924 spinlock_t i_block_reservation_lock;
935 925
@@ -985,7 +975,6 @@ struct ext4_inode_info {
985#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ 975#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
986#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 976#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
987#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 977#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
988#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
989#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 978#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
990#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 979#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
991#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 980#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
@@ -1316,6 +1305,11 @@ struct ext4_sb_info {
1316 1305
1317 /* Precomputed FS UUID checksum for seeding other checksums */ 1306 /* Precomputed FS UUID checksum for seeding other checksums */
1318 __u32 s_csum_seed; 1307 __u32 s_csum_seed;
1308
1309 /* Reclaim extents from extent status tree */
1310 struct shrinker s_es_shrinker;
1311 struct list_head s_es_lru;
1312 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
1319}; 1313};
1320 1314
1321static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1315static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -2007,9 +2001,20 @@ extern int ext4fs_dirhash(const char *name, int len, struct
2007 dx_hash_info *hinfo); 2001 dx_hash_info *hinfo);
2008 2002
2009/* ialloc.c */ 2003/* ialloc.c */
2010extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t, 2004extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
2011 const struct qstr *qstr, __u32 goal, 2005 const struct qstr *qstr, __u32 goal,
2012 uid_t *owner); 2006 uid_t *owner, int handle_type,
2007 unsigned int line_no, int nblocks);
2008
2009#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
2010 __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
2011 0, 0, 0)
2012#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
2013 type, nblocks) \
2014 __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
2015 (type), __LINE__, (nblocks))
2016
2017
2013extern void ext4_free_inode(handle_t *, struct inode *); 2018extern void ext4_free_inode(handle_t *, struct inode *);
2014extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 2019extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
2015extern unsigned long ext4_count_free_inodes(struct super_block *); 2020extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -2103,6 +2108,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
2103extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2108extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2104extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); 2109extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
2105extern void ext4_ind_truncate(struct inode *inode); 2110extern void ext4_ind_truncate(struct inode *inode);
2111extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
2106 2112
2107/* ioctl.c */ 2113/* ioctl.c */
2108extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 2114extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2151,6 +2157,8 @@ extern void *ext4_kvzalloc(size_t size, gfp_t flags);
2151extern void ext4_kvfree(void *ptr); 2157extern void ext4_kvfree(void *ptr);
2152extern int ext4_alloc_flex_bg_array(struct super_block *sb, 2158extern int ext4_alloc_flex_bg_array(struct super_block *sb,
2153 ext4_group_t ngroup); 2159 ext4_group_t ngroup);
2160extern const char *ext4_decode_error(struct super_block *sb, int errno,
2161 char nbuf[16]);
2154extern __printf(4, 5) 2162extern __printf(4, 5)
2155void __ext4_error(struct super_block *, const char *, unsigned int, 2163void __ext4_error(struct super_block *, const char *, unsigned int,
2156 const char *, ...); 2164 const char *, ...);
@@ -2227,6 +2235,8 @@ extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group,
2227 struct ext4_group_desc *gdp); 2235 struct ext4_group_desc *gdp);
2228extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, 2236extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
2229 struct ext4_group_desc *gdp); 2237 struct ext4_group_desc *gdp);
2238extern int ext4_register_li_request(struct super_block *sb,
2239 ext4_group_t first_not_zeroed);
2230 2240
2231static inline int ext4_has_group_desc_csum(struct super_block *sb) 2241static inline int ext4_has_group_desc_csum(struct super_block *sb)
2232{ 2242{
@@ -2454,6 +2464,75 @@ extern const struct file_operations ext4_file_operations;
2454extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2464extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2455extern void ext4_unwritten_wait(struct inode *inode); 2465extern void ext4_unwritten_wait(struct inode *inode);
2456 2466
2467/* inline.c */
2468extern int ext4_has_inline_data(struct inode *inode);
2469extern int ext4_get_inline_size(struct inode *inode);
2470extern int ext4_get_max_inline_size(struct inode *inode);
2471extern int ext4_find_inline_data_nolock(struct inode *inode);
2472extern void ext4_write_inline_data(struct inode *inode,
2473 struct ext4_iloc *iloc,
2474 void *buffer, loff_t pos,
2475 unsigned int len);
2476extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
2477 unsigned int len);
2478extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
2479 unsigned int len);
2480extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
2481
2482extern int ext4_readpage_inline(struct inode *inode, struct page *page);
2483extern int ext4_try_to_write_inline_data(struct address_space *mapping,
2484 struct inode *inode,
2485 loff_t pos, unsigned len,
2486 unsigned flags,
2487 struct page **pagep);
2488extern int ext4_write_inline_data_end(struct inode *inode,
2489 loff_t pos, unsigned len,
2490 unsigned copied,
2491 struct page *page);
2492extern struct buffer_head *
2493ext4_journalled_write_inline_data(struct inode *inode,
2494 unsigned len,
2495 struct page *page);
2496extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
2497 struct inode *inode,
2498 loff_t pos, unsigned len,
2499 unsigned flags,
2500 struct page **pagep,
2501 void **fsdata);
2502extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
2503 unsigned len, unsigned copied,
2504 struct page *page);
2505extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
2506 struct inode *inode);
2507extern int ext4_try_create_inline_dir(handle_t *handle,
2508 struct inode *parent,
2509 struct inode *inode);
2510extern int ext4_read_inline_dir(struct file *filp,
2511 void *dirent, filldir_t filldir,
2512 int *has_inline_data);
2513extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
2514 const struct qstr *d_name,
2515 struct ext4_dir_entry_2 **res_dir,
2516 int *has_inline_data);
2517extern int ext4_delete_inline_entry(handle_t *handle,
2518 struct inode *dir,
2519 struct ext4_dir_entry_2 *de_del,
2520 struct buffer_head *bh,
2521 int *has_inline_data);
2522extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
2523extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
2524 struct ext4_dir_entry_2 **parent_de,
2525 int *retval);
2526extern int ext4_inline_data_fiemap(struct inode *inode,
2527 struct fiemap_extent_info *fieinfo,
2528 int *has_inline);
2529extern int ext4_try_to_evict_inline_data(handle_t *handle,
2530 struct inode *inode,
2531 int needed);
2532extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
2533
2534extern int ext4_convert_inline_data(struct inode *inode);
2535
2457/* namei.c */ 2536/* namei.c */
2458extern const struct inode_operations ext4_dir_inode_operations; 2537extern const struct inode_operations ext4_dir_inode_operations;
2459extern const struct inode_operations ext4_special_inode_operations; 2538extern const struct inode_operations ext4_special_inode_operations;
@@ -2520,6 +2599,9 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
2520 struct ext4_ext_path *); 2599 struct ext4_ext_path *);
2521extern void ext4_ext_drop_refs(struct ext4_ext_path *); 2600extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2522extern int ext4_ext_check_inode(struct inode *inode); 2601extern int ext4_ext_check_inode(struct inode *inode);
2602extern int ext4_find_delalloc_range(struct inode *inode,
2603 ext4_lblk_t lblk_start,
2604 ext4_lblk_t lblk_end);
2523extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); 2605extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2524extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2606extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2525 __u64 start, __u64 len); 2607 __u64 start, __u64 len);
@@ -2537,6 +2619,7 @@ extern void ext4_exit_pageio(void);
2537extern void ext4_ioend_wait(struct inode *); 2619extern void ext4_ioend_wait(struct inode *);
2538extern void ext4_free_io_end(ext4_io_end_t *io); 2620extern void ext4_free_io_end(ext4_io_end_t *io);
2539extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2621extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2622extern void ext4_end_io_work(struct work_struct *work);
2540extern void ext4_io_submit(struct ext4_io_submit *io); 2623extern void ext4_io_submit(struct ext4_io_submit *io);
2541extern int ext4_bio_write_page(struct ext4_io_submit *io, 2624extern int ext4_bio_write_page(struct ext4_io_submit *io,
2542 struct page *page, 2625 struct page *page,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 487fda12bc00..8643ff5bbeb7 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -193,12 +193,6 @@ static inline unsigned short ext_depth(struct inode *inode)
193 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); 193 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
194} 194}
195 195
196static inline void
197ext4_ext_invalidate_cache(struct inode *inode)
198{
199 EXT4_I(inode)->i_cached_extent.ec_len = 0;
200}
201
202static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) 196static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
203{ 197{
204 /* We can not have an uninitialized extent of zero length! */ 198 /* We can not have an uninitialized extent of zero length! */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b4323ba846b5..7058975e3a55 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,6 +6,108 @@
6 6
7#include <trace/events/ext4.h> 7#include <trace/events/ext4.h>
8 8
9/* Just increment the non-pointer handle value */
10static handle_t *ext4_get_nojournal(void)
11{
12 handle_t *handle = current->journal_info;
13 unsigned long ref_cnt = (unsigned long)handle;
14
15 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
16
17 ref_cnt++;
18 handle = (handle_t *)ref_cnt;
19
20 current->journal_info = handle;
21 return handle;
22}
23
24
25/* Decrement the non-pointer handle value */
26static void ext4_put_nojournal(handle_t *handle)
27{
28 unsigned long ref_cnt = (unsigned long)handle;
29
30 BUG_ON(ref_cnt == 0);
31
32 ref_cnt--;
33 handle = (handle_t *)ref_cnt;
34
35 current->journal_info = handle;
36}
37
38/*
39 * Wrappers for jbd2_journal_start/end.
40 */
41handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
42 int type, int nblocks)
43{
44 journal_t *journal;
45
46 trace_ext4_journal_start(sb, nblocks, _RET_IP_);
47 if (sb->s_flags & MS_RDONLY)
48 return ERR_PTR(-EROFS);
49
50 WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
51 journal = EXT4_SB(sb)->s_journal;
52 if (!journal)
53 return ext4_get_nojournal();
54 /*
55 * Special case here: if the journal has aborted behind our
56 * backs (eg. EIO in the commit thread), then we still need to
57 * take the FS itself readonly cleanly.
58 */
59 if (is_journal_aborted(journal)) {
60 ext4_abort(sb, "Detected aborted journal");
61 return ERR_PTR(-EROFS);
62 }
63 return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line);
64}
65
66int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
67{
68 struct super_block *sb;
69 int err;
70 int rc;
71
72 if (!ext4_handle_valid(handle)) {
73 ext4_put_nojournal(handle);
74 return 0;
75 }
76 sb = handle->h_transaction->t_journal->j_private;
77 err = handle->h_err;
78 rc = jbd2_journal_stop(handle);
79
80 if (!err)
81 err = rc;
82 if (err)
83 __ext4_std_error(sb, where, line, err);
84 return err;
85}
86
87void ext4_journal_abort_handle(const char *caller, unsigned int line,
88 const char *err_fn, struct buffer_head *bh,
89 handle_t *handle, int err)
90{
91 char nbuf[16];
92 const char *errstr = ext4_decode_error(NULL, err, nbuf);
93
94 BUG_ON(!ext4_handle_valid(handle));
95
96 if (bh)
97 BUFFER_TRACE(bh, "abort");
98
99 if (!handle->h_err)
100 handle->h_err = err;
101
102 if (is_handle_aborted(handle))
103 return;
104
105 printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
106 caller, line, errstr, err_fn);
107
108 jbd2_journal_abort_handle(handle);
109}
110
9int __ext4_journal_get_write_access(const char *where, unsigned int line, 111int __ext4_journal_get_write_access(const char *where, unsigned int line,
10 handle_t *handle, struct buffer_head *bh) 112 handle_t *handle, struct buffer_head *bh)
11{ 113{
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 7177f9b21cb2..4c216b1bf20c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -59,12 +59,6 @@
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ 59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) 60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
61 61
62/* Delete operations potentially hit one directory's namespace plus an
63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
64 * generous. We can grow the delete transaction later if necessary. */
65
66#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
67
68/* Define an arbitrary limit for the amount of data we will anticipate 62/* Define an arbitrary limit for the amount of data we will anticipate
69 * writing to any given transaction. For unbounded transactions such as 63 * writing to any given transaction. For unbounded transactions such as
70 * write(2) and truncate(2) we can write more than this, but we always 64 * write(2) and truncate(2) we can write more than this, but we always
@@ -110,6 +104,36 @@
110#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
111#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
112 106
107static inline int ext4_jbd2_credits_xattr(struct inode *inode)
108{
109 int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
110
111 /*
112 * In case of inline data, we may push out the data to a block,
113 * so we need to reserve credits for this eventuality
114 */
115 if (ext4_has_inline_data(inode))
116 credits += ext4_writepage_trans_blocks(inode) + 1;
117 return credits;
118}
119
120
121/*
122 * Ext4 handle operation types -- for logging purposes
123 */
124#define EXT4_HT_MISC 0
125#define EXT4_HT_INODE 1
126#define EXT4_HT_WRITE_PAGE 2
127#define EXT4_HT_MAP_BLOCKS 3
128#define EXT4_HT_DIR 4
129#define EXT4_HT_TRUNCATE 5
130#define EXT4_HT_QUOTA 6
131#define EXT4_HT_RESIZE 7
132#define EXT4_HT_MIGRATE 8
133#define EXT4_HT_MOVE_EXTENTS 9
134#define EXT4_HT_XATTR 10
135#define EXT4_HT_MAX 11
136
113/** 137/**
114 * struct ext4_journal_cb_entry - Base structure for callback information. 138 * struct ext4_journal_cb_entry - Base structure for callback information.
115 * 139 *
@@ -234,7 +258,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
234#define ext4_handle_dirty_super(handle, sb) \ 258#define ext4_handle_dirty_super(handle, sb) \
235 __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) 259 __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
236 260
237handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); 261handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
262 int type, int nblocks);
238int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); 263int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
239 264
240#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) 265#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -268,9 +293,17 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
268 return 1; 293 return 1;
269} 294}
270 295
271static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) 296#define ext4_journal_start_sb(sb, type, nblocks) \
297 __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks))
298
299#define ext4_journal_start(inode, type, nblocks) \
300 __ext4_journal_start((inode), __LINE__, (type), (nblocks))
301
302static inline handle_t *__ext4_journal_start(struct inode *inode,
303 unsigned int line, int type,
304 int nblocks)
272{ 305{
273 return ext4_journal_start_sb(inode->i_sb, nblocks); 306 return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks);
274} 307}
275 308
276#define ext4_journal_stop(handle) \ 309#define ext4_journal_stop(handle) \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5ae1674ec12f..28dd8eeea6a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -112,7 +112,7 @@ static int ext4_split_extent_at(handle_t *handle,
112 int flags); 112 int flags);
113 113
114static int ext4_find_delayed_extent(struct inode *inode, 114static int ext4_find_delayed_extent(struct inode *inode,
115 struct ext4_ext_cache *newex); 115 struct extent_status *newes);
116 116
117static int ext4_ext_truncate_extend_restart(handle_t *handle, 117static int ext4_ext_truncate_extend_restart(handle_t *handle,
118 struct inode *inode, 118 struct inode *inode,
@@ -714,7 +714,6 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
714 eh->eh_magic = EXT4_EXT_MAGIC; 714 eh->eh_magic = EXT4_EXT_MAGIC;
715 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); 715 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
716 ext4_mark_inode_dirty(handle, inode); 716 ext4_mark_inode_dirty(handle, inode);
717 ext4_ext_invalidate_cache(inode);
718 return 0; 717 return 0;
719} 718}
720 719
@@ -725,6 +724,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
725 struct ext4_extent_header *eh; 724 struct ext4_extent_header *eh;
726 struct buffer_head *bh; 725 struct buffer_head *bh;
727 short int depth, i, ppos = 0, alloc = 0; 726 short int depth, i, ppos = 0, alloc = 0;
727 int ret;
728 728
729 eh = ext_inode_hdr(inode); 729 eh = ext_inode_hdr(inode);
730 depth = ext_depth(inode); 730 depth = ext_depth(inode);
@@ -752,12 +752,15 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
752 path[ppos].p_ext = NULL; 752 path[ppos].p_ext = NULL;
753 753
754 bh = sb_getblk(inode->i_sb, path[ppos].p_block); 754 bh = sb_getblk(inode->i_sb, path[ppos].p_block);
755 if (unlikely(!bh)) 755 if (unlikely(!bh)) {
756 ret = -ENOMEM;
756 goto err; 757 goto err;
758 }
757 if (!bh_uptodate_or_lock(bh)) { 759 if (!bh_uptodate_or_lock(bh)) {
758 trace_ext4_ext_load_extent(inode, block, 760 trace_ext4_ext_load_extent(inode, block,
759 path[ppos].p_block); 761 path[ppos].p_block);
760 if (bh_submit_read(bh) < 0) { 762 ret = bh_submit_read(bh);
763 if (ret < 0) {
761 put_bh(bh); 764 put_bh(bh);
762 goto err; 765 goto err;
763 } 766 }
@@ -768,13 +771,15 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
768 put_bh(bh); 771 put_bh(bh);
769 EXT4_ERROR_INODE(inode, 772 EXT4_ERROR_INODE(inode,
770 "ppos %d > depth %d", ppos, depth); 773 "ppos %d > depth %d", ppos, depth);
774 ret = -EIO;
771 goto err; 775 goto err;
772 } 776 }
773 path[ppos].p_bh = bh; 777 path[ppos].p_bh = bh;
774 path[ppos].p_hdr = eh; 778 path[ppos].p_hdr = eh;
775 i--; 779 i--;
776 780
777 if (ext4_ext_check_block(inode, eh, i, bh)) 781 ret = ext4_ext_check_block(inode, eh, i, bh);
782 if (ret < 0)
778 goto err; 783 goto err;
779 } 784 }
780 785
@@ -796,7 +801,7 @@ err:
796 ext4_ext_drop_refs(path); 801 ext4_ext_drop_refs(path);
797 if (alloc) 802 if (alloc)
798 kfree(path); 803 kfree(path);
799 return ERR_PTR(-EIO); 804 return ERR_PTR(ret);
800} 805}
801 806
802/* 807/*
@@ -950,8 +955,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
950 goto cleanup; 955 goto cleanup;
951 } 956 }
952 bh = sb_getblk(inode->i_sb, newblock); 957 bh = sb_getblk(inode->i_sb, newblock);
953 if (!bh) { 958 if (unlikely(!bh)) {
954 err = -EIO; 959 err = -ENOMEM;
955 goto cleanup; 960 goto cleanup;
956 } 961 }
957 lock_buffer(bh); 962 lock_buffer(bh);
@@ -1023,8 +1028,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
1023 oldblock = newblock; 1028 oldblock = newblock;
1024 newblock = ablocks[--a]; 1029 newblock = ablocks[--a];
1025 bh = sb_getblk(inode->i_sb, newblock); 1030 bh = sb_getblk(inode->i_sb, newblock);
1026 if (!bh) { 1031 if (unlikely(!bh)) {
1027 err = -EIO; 1032 err = -ENOMEM;
1028 goto cleanup; 1033 goto cleanup;
1029 } 1034 }
1030 lock_buffer(bh); 1035 lock_buffer(bh);
@@ -1136,11 +1141,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1136 return err; 1141 return err;
1137 1142
1138 bh = sb_getblk(inode->i_sb, newblock); 1143 bh = sb_getblk(inode->i_sb, newblock);
1139 if (!bh) { 1144 if (unlikely(!bh))
1140 err = -EIO; 1145 return -ENOMEM;
1141 ext4_std_error(inode->i_sb, err);
1142 return err;
1143 }
1144 lock_buffer(bh); 1146 lock_buffer(bh);
1145 1147
1146 err = ext4_journal_get_create_access(handle, bh); 1148 err = ext4_journal_get_create_access(handle, bh);
@@ -1960,7 +1962,6 @@ cleanup:
1960 ext4_ext_drop_refs(npath); 1962 ext4_ext_drop_refs(npath);
1961 kfree(npath); 1963 kfree(npath);
1962 } 1964 }
1963 ext4_ext_invalidate_cache(inode);
1964 return err; 1965 return err;
1965} 1966}
1966 1967
@@ -1969,8 +1970,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
1969 struct fiemap_extent_info *fieinfo) 1970 struct fiemap_extent_info *fieinfo)
1970{ 1971{
1971 struct ext4_ext_path *path = NULL; 1972 struct ext4_ext_path *path = NULL;
1972 struct ext4_ext_cache newex;
1973 struct ext4_extent *ex; 1973 struct ext4_extent *ex;
1974 struct extent_status es;
1974 ext4_lblk_t next, next_del, start = 0, end = 0; 1975 ext4_lblk_t next, next_del, start = 0, end = 0;
1975 ext4_lblk_t last = block + num; 1976 ext4_lblk_t last = block + num;
1976 int exists, depth = 0, err = 0; 1977 int exists, depth = 0, err = 0;
@@ -2044,37 +2045,47 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2044 BUG_ON(end <= start); 2045 BUG_ON(end <= start);
2045 2046
2046 if (!exists) { 2047 if (!exists) {
2047 newex.ec_block = start; 2048 es.es_lblk = start;
2048 newex.ec_len = end - start; 2049 es.es_len = end - start;
2049 newex.ec_start = 0; 2050 es.es_pblk = 0;
2050 } else { 2051 } else {
2051 newex.ec_block = le32_to_cpu(ex->ee_block); 2052 es.es_lblk = le32_to_cpu(ex->ee_block);
2052 newex.ec_len = ext4_ext_get_actual_len(ex); 2053 es.es_len = ext4_ext_get_actual_len(ex);
2053 newex.ec_start = ext4_ext_pblock(ex); 2054 es.es_pblk = ext4_ext_pblock(ex);
2054 if (ext4_ext_is_uninitialized(ex)) 2055 if (ext4_ext_is_uninitialized(ex))
2055 flags |= FIEMAP_EXTENT_UNWRITTEN; 2056 flags |= FIEMAP_EXTENT_UNWRITTEN;
2056 } 2057 }
2057 2058
2058 /* 2059 /*
2059 * Find delayed extent and update newex accordingly. We call 2060 * Find delayed extent and update es accordingly. We call
2060 * it even in !exists case to find out whether newex is the 2061 * it even in !exists case to find out whether es is the
2061 * last existing extent or not. 2062 * last existing extent or not.
2062 */ 2063 */
2063 next_del = ext4_find_delayed_extent(inode, &newex); 2064 next_del = ext4_find_delayed_extent(inode, &es);
2064 if (!exists && next_del) { 2065 if (!exists && next_del) {
2065 exists = 1; 2066 exists = 1;
2066 flags |= FIEMAP_EXTENT_DELALLOC; 2067 flags |= FIEMAP_EXTENT_DELALLOC;
2067 } 2068 }
2068 up_read(&EXT4_I(inode)->i_data_sem); 2069 up_read(&EXT4_I(inode)->i_data_sem);
2069 2070
2070 if (unlikely(newex.ec_len == 0)) { 2071 if (unlikely(es.es_len == 0)) {
2071 EXT4_ERROR_INODE(inode, "newex.ec_len == 0"); 2072 EXT4_ERROR_INODE(inode, "es.es_len == 0");
2072 err = -EIO; 2073 err = -EIO;
2073 break; 2074 break;
2074 } 2075 }
2075 2076
2076 /* This is possible iff next == next_del == EXT_MAX_BLOCKS */ 2077 /*
2077 if (next == next_del) { 2078 * This is possible iff next == next_del == EXT_MAX_BLOCKS.
2079 * we need to check next == EXT_MAX_BLOCKS because it is
2080 * possible that an extent is with unwritten and delayed
2081 * status due to when an extent is delayed allocated and
2082 * is allocated by fallocate status tree will track both of
2083 * them in a extent.
2084 *
2085 * So we could return a unwritten and delayed extent, and
2086 * its block is equal to 'next'.
2087 */
2088 if (next == next_del && next == EXT_MAX_BLOCKS) {
2078 flags |= FIEMAP_EXTENT_LAST; 2089 flags |= FIEMAP_EXTENT_LAST;
2079 if (unlikely(next_del != EXT_MAX_BLOCKS || 2090 if (unlikely(next_del != EXT_MAX_BLOCKS ||
2080 next != EXT_MAX_BLOCKS)) { 2091 next != EXT_MAX_BLOCKS)) {
@@ -2089,9 +2100,9 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2089 2100
2090 if (exists) { 2101 if (exists) {
2091 err = fiemap_fill_next_extent(fieinfo, 2102 err = fiemap_fill_next_extent(fieinfo,
2092 (__u64)newex.ec_block << blksize_bits, 2103 (__u64)es.es_lblk << blksize_bits,
2093 (__u64)newex.ec_start << blksize_bits, 2104 (__u64)es.es_pblk << blksize_bits,
2094 (__u64)newex.ec_len << blksize_bits, 2105 (__u64)es.es_len << blksize_bits,
2095 flags); 2106 flags);
2096 if (err < 0) 2107 if (err < 0)
2097 break; 2108 break;
@@ -2101,7 +2112,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2101 } 2112 }
2102 } 2113 }
2103 2114
2104 block = newex.ec_block + newex.ec_len; 2115 block = es.es_lblk + es.es_len;
2105 } 2116 }
2106 2117
2107 if (path) { 2118 if (path) {
@@ -2112,21 +2123,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2112 return err; 2123 return err;
2113} 2124}
2114 2125
2115static void
2116ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
2117 __u32 len, ext4_fsblk_t start)
2118{
2119 struct ext4_ext_cache *cex;
2120 BUG_ON(len == 0);
2121 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2122 trace_ext4_ext_put_in_cache(inode, block, len, start);
2123 cex = &EXT4_I(inode)->i_cached_extent;
2124 cex->ec_block = block;
2125 cex->ec_len = len;
2126 cex->ec_start = start;
2127 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2128}
2129
2130/* 2126/*
2131 * ext4_ext_put_gap_in_cache: 2127 * ext4_ext_put_gap_in_cache:
2132 * calculate boundaries of the gap that the requested block fits into 2128 * calculate boundaries of the gap that the requested block fits into
@@ -2143,9 +2139,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2143 2139
2144 ex = path[depth].p_ext; 2140 ex = path[depth].p_ext;
2145 if (ex == NULL) { 2141 if (ex == NULL) {
2146 /* there is no extent yet, so gap is [0;-] */ 2142 /*
2147 lblock = 0; 2143 * there is no extent yet, so gap is [0;-] and we
2148 len = EXT_MAX_BLOCKS; 2144 * don't cache it
2145 */
2149 ext_debug("cache gap(whole file):"); 2146 ext_debug("cache gap(whole file):");
2150 } else if (block < le32_to_cpu(ex->ee_block)) { 2147 } else if (block < le32_to_cpu(ex->ee_block)) {
2151 lblock = block; 2148 lblock = block;
@@ -2154,6 +2151,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2154 block, 2151 block,
2155 le32_to_cpu(ex->ee_block), 2152 le32_to_cpu(ex->ee_block),
2156 ext4_ext_get_actual_len(ex)); 2153 ext4_ext_get_actual_len(ex));
2154 if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
2155 ext4_es_insert_extent(inode, lblock, len, ~0,
2156 EXTENT_STATUS_HOLE);
2157 } else if (block >= le32_to_cpu(ex->ee_block) 2157 } else if (block >= le32_to_cpu(ex->ee_block)
2158 + ext4_ext_get_actual_len(ex)) { 2158 + ext4_ext_get_actual_len(ex)) {
2159 ext4_lblk_t next; 2159 ext4_lblk_t next;
@@ -2167,58 +2167,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2167 block); 2167 block);
2168 BUG_ON(next == lblock); 2168 BUG_ON(next == lblock);
2169 len = next - lblock; 2169 len = next - lblock;
2170 if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
2171 ext4_es_insert_extent(inode, lblock, len, ~0,
2172 EXTENT_STATUS_HOLE);
2170 } else { 2173 } else {
2171 lblock = len = 0; 2174 lblock = len = 0;
2172 BUG(); 2175 BUG();
2173 } 2176 }
2174 2177
2175 ext_debug(" -> %u:%lu\n", lblock, len); 2178 ext_debug(" -> %u:%lu\n", lblock, len);
2176 ext4_ext_put_in_cache(inode, lblock, len, 0);
2177}
2178
2179/*
2180 * ext4_ext_in_cache()
2181 * Checks to see if the given block is in the cache.
2182 * If it is, the cached extent is stored in the given
2183 * cache extent pointer.
2184 *
2185 * @inode: The files inode
2186 * @block: The block to look for in the cache
2187 * @ex: Pointer where the cached extent will be stored
2188 * if it contains block
2189 *
2190 * Return 0 if cache is invalid; 1 if the cache is valid
2191 */
2192static int
2193ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2194 struct ext4_extent *ex)
2195{
2196 struct ext4_ext_cache *cex;
2197 int ret = 0;
2198
2199 /*
2200 * We borrow i_block_reservation_lock to protect i_cached_extent
2201 */
2202 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2203 cex = &EXT4_I(inode)->i_cached_extent;
2204
2205 /* has cache valid data? */
2206 if (cex->ec_len == 0)
2207 goto errout;
2208
2209 if (in_range(block, cex->ec_block, cex->ec_len)) {
2210 ex->ee_block = cpu_to_le32(cex->ec_block);
2211 ext4_ext_store_pblock(ex, cex->ec_start);
2212 ex->ee_len = cpu_to_le16(cex->ec_len);
2213 ext_debug("%u cached by %u:%u:%llu\n",
2214 block,
2215 cex->ec_block, cex->ec_len, cex->ec_start);
2216 ret = 1;
2217 }
2218errout:
2219 trace_ext4_ext_in_cache(inode, block, ret);
2220 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2221 return ret;
2222} 2179}
2223 2180
2224/* 2181/*
@@ -2653,13 +2610,11 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2653 ext_debug("truncate since %u to %u\n", start, end); 2610 ext_debug("truncate since %u to %u\n", start, end);
2654 2611
2655 /* probably first extent we're gonna free will be last in block */ 2612 /* probably first extent we're gonna free will be last in block */
2656 handle = ext4_journal_start(inode, depth + 1); 2613 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
2657 if (IS_ERR(handle)) 2614 if (IS_ERR(handle))
2658 return PTR_ERR(handle); 2615 return PTR_ERR(handle);
2659 2616
2660again: 2617again:
2661 ext4_ext_invalidate_cache(inode);
2662
2663 trace_ext4_ext_remove_space(inode, start, depth); 2618 trace_ext4_ext_remove_space(inode, start, depth);
2664 2619
2665 /* 2620 /*
@@ -3519,19 +3474,19 @@ out:
3519 * 3474 *
3520 * Return 1 if there is a delalloc block in the range, otherwise 0. 3475 * Return 1 if there is a delalloc block in the range, otherwise 0.
3521 */ 3476 */
3522static int ext4_find_delalloc_range(struct inode *inode, 3477int ext4_find_delalloc_range(struct inode *inode,
3523 ext4_lblk_t lblk_start, 3478 ext4_lblk_t lblk_start,
3524 ext4_lblk_t lblk_end) 3479 ext4_lblk_t lblk_end)
3525{ 3480{
3526 struct extent_status es; 3481 struct extent_status es;
3527 3482
3528 es.start = lblk_start; 3483 ext4_es_find_delayed_extent(inode, lblk_start, &es);
3529 ext4_es_find_extent(inode, &es); 3484 if (es.es_len == 0)
3530 if (es.len == 0)
3531 return 0; /* there is no delay extent in this tree */ 3485 return 0; /* there is no delay extent in this tree */
3532 else if (es.start <= lblk_start && lblk_start < es.start + es.len) 3486 else if (es.es_lblk <= lblk_start &&
3487 lblk_start < es.es_lblk + es.es_len)
3533 return 1; 3488 return 1;
3534 else if (lblk_start <= es.start && es.start <= lblk_end) 3489 else if (lblk_start <= es.es_lblk && es.es_lblk <= lblk_end)
3535 return 1; 3490 return 1;
3536 else 3491 else
3537 return 0; 3492 return 0;
@@ -3656,6 +3611,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3656 ext4_set_io_unwritten_flag(inode, io); 3611 ext4_set_io_unwritten_flag(inode, io);
3657 else 3612 else
3658 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3613 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3614 map->m_flags |= EXT4_MAP_UNWRITTEN;
3659 if (ext4_should_dioread_nolock(inode)) 3615 if (ext4_should_dioread_nolock(inode))
3660 map->m_flags |= EXT4_MAP_UNINIT; 3616 map->m_flags |= EXT4_MAP_UNINIT;
3661 goto out; 3617 goto out;
@@ -3677,8 +3633,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3677 * repeat fallocate creation request 3633 * repeat fallocate creation request
3678 * we already have an unwritten extent 3634 * we already have an unwritten extent
3679 */ 3635 */
3680 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) 3636 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) {
3637 map->m_flags |= EXT4_MAP_UNWRITTEN;
3681 goto map_out; 3638 goto map_out;
3639 }
3682 3640
3683 /* buffered READ or buffered write_begin() lookup */ 3641 /* buffered READ or buffered write_begin() lookup */
3684 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3642 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -3898,35 +3856,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3898 map->m_lblk, map->m_len, inode->i_ino); 3856 map->m_lblk, map->m_len, inode->i_ino);
3899 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3857 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3900 3858
3901 /* check in cache */
3902 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3903 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3904 if ((sbi->s_cluster_ratio > 1) &&
3905 ext4_find_delalloc_cluster(inode, map->m_lblk))
3906 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3907
3908 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3909 /*
3910 * block isn't allocated yet and
3911 * user doesn't want to allocate it
3912 */
3913 goto out2;
3914 }
3915 /* we should allocate requested block */
3916 } else {
3917 /* block is already allocated */
3918 if (sbi->s_cluster_ratio > 1)
3919 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3920 newblock = map->m_lblk
3921 - le32_to_cpu(newex.ee_block)
3922 + ext4_ext_pblock(&newex);
3923 /* number of remaining blocks in the extent */
3924 allocated = ext4_ext_get_actual_len(&newex) -
3925 (map->m_lblk - le32_to_cpu(newex.ee_block));
3926 goto out;
3927 }
3928 }
3929
3930 /* find extent for this block */ 3859 /* find extent for this block */
3931 path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 3860 path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
3932 if (IS_ERR(path)) { 3861 if (IS_ERR(path)) {
@@ -3973,15 +3902,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3973 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3902 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
3974 ee_block, ee_len, newblock); 3903 ee_block, ee_len, newblock);
3975 3904
3976 /* 3905 if (!ext4_ext_is_uninitialized(ex))
3977 * Do not put uninitialized extent
3978 * in the cache
3979 */
3980 if (!ext4_ext_is_uninitialized(ex)) {
3981 ext4_ext_put_in_cache(inode, ee_block,
3982 ee_len, ee_start);
3983 goto out; 3906 goto out;
3984 } 3907
3985 allocated = ext4_ext_handle_uninitialized_extents( 3908 allocated = ext4_ext_handle_uninitialized_extents(
3986 handle, inode, map, path, flags, 3909 handle, inode, map, path, flags,
3987 allocated, newblock); 3910 allocated, newblock);
@@ -4002,7 +3925,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4002 * put just found gap into cache to speed up 3925 * put just found gap into cache to speed up
4003 * subsequent requests 3926 * subsequent requests
4004 */ 3927 */
4005 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3928 if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
3929 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
4006 goto out2; 3930 goto out2;
4007 } 3931 }
4008 3932
@@ -4108,6 +4032,7 @@ got_allocated_blocks:
4108 /* Mark uninitialized */ 4032 /* Mark uninitialized */
4109 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 4033 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
4110 ext4_ext_mark_uninitialized(&newex); 4034 ext4_ext_mark_uninitialized(&newex);
4035 map->m_flags |= EXT4_MAP_UNWRITTEN;
4111 /* 4036 /*
4112 * io_end structure was created for every IO write to an 4037 * io_end structure was created for every IO write to an
4113 * uninitialized extent. To avoid unnecessary conversion, 4038 * uninitialized extent. To avoid unnecessary conversion,
@@ -4241,10 +4166,9 @@ got_allocated_blocks:
4241 * Cache the extent and update transaction to commit on fdatasync only 4166 * Cache the extent and update transaction to commit on fdatasync only
4242 * when it is _not_ an uninitialized extent. 4167 * when it is _not_ an uninitialized extent.
4243 */ 4168 */
4244 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 4169 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
4245 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
4246 ext4_update_inode_fsync_trans(handle, inode, 1); 4170 ext4_update_inode_fsync_trans(handle, inode, 1);
4247 } else 4171 else
4248 ext4_update_inode_fsync_trans(handle, inode, 0); 4172 ext4_update_inode_fsync_trans(handle, inode, 0);
4249out: 4173out:
4250 if (allocated > map->m_len) 4174 if (allocated > map->m_len)
@@ -4284,7 +4208,7 @@ void ext4_ext_truncate(struct inode *inode)
4284 * probably first extent we're gonna free will be last in block 4208 * probably first extent we're gonna free will be last in block
4285 */ 4209 */
4286 err = ext4_writepage_trans_blocks(inode); 4210 err = ext4_writepage_trans_blocks(inode);
4287 handle = ext4_journal_start(inode, err); 4211 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, err);
4288 if (IS_ERR(handle)) 4212 if (IS_ERR(handle))
4289 return; 4213 return;
4290 4214
@@ -4303,7 +4227,6 @@ void ext4_ext_truncate(struct inode *inode)
4303 goto out_stop; 4227 goto out_stop;
4304 4228
4305 down_write(&EXT4_I(inode)->i_data_sem); 4229 down_write(&EXT4_I(inode)->i_data_sem);
4306 ext4_ext_invalidate_cache(inode);
4307 4230
4308 ext4_discard_preallocations(inode); 4231 ext4_discard_preallocations(inode);
4309 4232
@@ -4386,7 +4309,7 @@ static void ext4_falloc_update_inode(struct inode *inode,
4386 */ 4309 */
4387long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 4310long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4388{ 4311{
4389 struct inode *inode = file->f_path.dentry->d_inode; 4312 struct inode *inode = file_inode(file);
4390 handle_t *handle; 4313 handle_t *handle;
4391 loff_t new_size; 4314 loff_t new_size;
4392 unsigned int max_blocks; 4315 unsigned int max_blocks;
@@ -4397,13 +4320,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4397 struct ext4_map_blocks map; 4320 struct ext4_map_blocks map;
4398 unsigned int credits, blkbits = inode->i_blkbits; 4321 unsigned int credits, blkbits = inode->i_blkbits;
4399 4322
4400 /*
4401 * currently supporting (pre)allocate mode for extent-based
4402 * files _only_
4403 */
4404 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4405 return -EOPNOTSUPP;
4406
4407 /* Return error if mode is not supported */ 4323 /* Return error if mode is not supported */
4408 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 4324 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
4409 return -EOPNOTSUPP; 4325 return -EOPNOTSUPP;
@@ -4415,6 +4331,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4415 if (ret) 4331 if (ret)
4416 return ret; 4332 return ret;
4417 4333
4334 /*
4335 * currently supporting (pre)allocate mode for extent-based
4336 * files _only_
4337 */
4338 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4339 return -EOPNOTSUPP;
4340
4418 trace_ext4_fallocate_enter(inode, offset, len, mode); 4341 trace_ext4_fallocate_enter(inode, offset, len, mode);
4419 map.m_lblk = offset >> blkbits; 4342 map.m_lblk = offset >> blkbits;
4420 /* 4343 /*
@@ -4451,7 +4374,8 @@ retry:
4451 while (ret >= 0 && ret < max_blocks) { 4374 while (ret >= 0 && ret < max_blocks) {
4452 map.m_lblk = map.m_lblk + ret; 4375 map.m_lblk = map.m_lblk + ret;
4453 map.m_len = max_blocks = max_blocks - ret; 4376 map.m_len = max_blocks = max_blocks - ret;
4454 handle = ext4_journal_start(inode, credits); 4377 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4378 credits);
4455 if (IS_ERR(handle)) { 4379 if (IS_ERR(handle)) {
4456 ret = PTR_ERR(handle); 4380 ret = PTR_ERR(handle);
4457 break; 4381 break;
@@ -4459,11 +4383,11 @@ retry:
4459 ret = ext4_map_blocks(handle, inode, &map, flags); 4383 ret = ext4_map_blocks(handle, inode, &map, flags);
4460 if (ret <= 0) { 4384 if (ret <= 0) {
4461#ifdef EXT4FS_DEBUG 4385#ifdef EXT4FS_DEBUG
4462 WARN_ON(ret <= 0); 4386 ext4_warning(inode->i_sb,
4463 printk(KERN_ERR "%s: ext4_ext_map_blocks " 4387 "inode #%lu: block %u: len %u: "
4464 "returned error inode#%lu, block=%u, " 4388 "ext4_ext_map_blocks returned %d",
4465 "max_blocks=%u", __func__, 4389 inode->i_ino, map.m_lblk,
4466 inode->i_ino, map.m_lblk, max_blocks); 4390 map.m_len, ret);
4467#endif 4391#endif
4468 ext4_mark_inode_dirty(handle, inode); 4392 ext4_mark_inode_dirty(handle, inode);
4469 ret2 = ext4_journal_stop(handle); 4393 ret2 = ext4_journal_stop(handle);
@@ -4529,21 +4453,19 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4529 while (ret >= 0 && ret < max_blocks) { 4453 while (ret >= 0 && ret < max_blocks) {
4530 map.m_lblk += ret; 4454 map.m_lblk += ret;
4531 map.m_len = (max_blocks -= ret); 4455 map.m_len = (max_blocks -= ret);
4532 handle = ext4_journal_start(inode, credits); 4456 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
4533 if (IS_ERR(handle)) { 4457 if (IS_ERR(handle)) {
4534 ret = PTR_ERR(handle); 4458 ret = PTR_ERR(handle);
4535 break; 4459 break;
4536 } 4460 }
4537 ret = ext4_map_blocks(handle, inode, &map, 4461 ret = ext4_map_blocks(handle, inode, &map,
4538 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 4462 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4539 if (ret <= 0) { 4463 if (ret <= 0)
4540 WARN_ON(ret <= 0); 4464 ext4_warning(inode->i_sb,
4541 ext4_msg(inode->i_sb, KERN_ERR, 4465 "inode #%lu: block %u: len %u: "
4542 "%s:%d: inode #%lu: block %u: len %u: " 4466 "ext4_ext_map_blocks returned %d",
4543 "ext4_ext_map_blocks returned %d", 4467 inode->i_ino, map.m_lblk,
4544 __func__, __LINE__, inode->i_ino, map.m_lblk, 4468 map.m_len, ret);
4545 map.m_len, ret);
4546 }
4547 ext4_mark_inode_dirty(handle, inode); 4469 ext4_mark_inode_dirty(handle, inode);
4548 ret2 = ext4_journal_stop(handle); 4470 ret2 = ext4_journal_stop(handle);
4549 if (ret <= 0 || ret2 ) 4471 if (ret <= 0 || ret2 )
@@ -4553,42 +4475,48 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4553} 4475}
4554 4476
4555/* 4477/*
4556 * If newex is not existing extent (newex->ec_start equals zero) find 4478 * If newes is not existing extent (newes->ec_pblk equals zero) find
4557 * delayed extent at start of newex and update newex accordingly and 4479 * delayed extent at start of newes and update newes accordingly and
4558 * return start of the next delayed extent. 4480 * return start of the next delayed extent.
4559 * 4481 *
4560 * If newex is existing extent (newex->ec_start is not equal zero) 4482 * If newes is existing extent (newes->ec_pblk is not equal zero)
4561 * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed 4483 * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
4562 * extent found. Leave newex unmodified. 4484 * extent found. Leave newes unmodified.
4563 */ 4485 */
4564static int ext4_find_delayed_extent(struct inode *inode, 4486static int ext4_find_delayed_extent(struct inode *inode,
4565 struct ext4_ext_cache *newex) 4487 struct extent_status *newes)
4566{ 4488{
4567 struct extent_status es; 4489 struct extent_status es;
4568 ext4_lblk_t next_del; 4490 ext4_lblk_t block, next_del;
4569 4491
4570 es.start = newex->ec_block; 4492 ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
4571 next_del = ext4_es_find_extent(inode, &es);
4572 4493
4573 if (newex->ec_start == 0) { 4494 if (newes->es_pblk == 0) {
4574 /* 4495 /*
4575 * No extent in extent-tree contains block @newex->ec_start, 4496 * No extent in extent-tree contains block @newes->es_pblk,
4576 * then the block may stay in 1)a hole or 2)delayed-extent. 4497 * then the block may stay in 1)a hole or 2)delayed-extent.
4577 */ 4498 */
4578 if (es.len == 0) 4499 if (es.es_len == 0)
4579 /* A hole found. */ 4500 /* A hole found. */
4580 return 0; 4501 return 0;
4581 4502
4582 if (es.start > newex->ec_block) { 4503 if (es.es_lblk > newes->es_lblk) {
4583 /* A hole found. */ 4504 /* A hole found. */
4584 newex->ec_len = min(es.start - newex->ec_block, 4505 newes->es_len = min(es.es_lblk - newes->es_lblk,
4585 newex->ec_len); 4506 newes->es_len);
4586 return 0; 4507 return 0;
4587 } 4508 }
4588 4509
4589 newex->ec_len = es.start + es.len - newex->ec_block; 4510 newes->es_len = es.es_lblk + es.es_len - newes->es_lblk;
4590 } 4511 }
4591 4512
4513 block = newes->es_lblk + newes->es_len;
4514 ext4_es_find_delayed_extent(inode, block, &es);
4515 if (es.es_len == 0)
4516 next_del = EXT_MAX_BLOCKS;
4517 else
4518 next_del = es.es_lblk;
4519
4592 return next_del; 4520 return next_del;
4593} 4521}
4594/* fiemap flags we can handle specified here */ 4522/* fiemap flags we can handle specified here */
@@ -4643,7 +4571,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
4643 */ 4571 */
4644int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) 4572int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4645{ 4573{
4646 struct inode *inode = file->f_path.dentry->d_inode; 4574 struct inode *inode = file_inode(file);
4647 struct super_block *sb = inode->i_sb; 4575 struct super_block *sb = inode->i_sb;
4648 ext4_lblk_t first_block, stop_block; 4576 ext4_lblk_t first_block, stop_block;
4649 struct address_space *mapping = inode->i_mapping; 4577 struct address_space *mapping = inode->i_mapping;
@@ -4709,7 +4637,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4709 inode_dio_wait(inode); 4637 inode_dio_wait(inode);
4710 4638
4711 credits = ext4_writepage_trans_blocks(inode); 4639 credits = ext4_writepage_trans_blocks(inode);
4712 handle = ext4_journal_start(inode, credits); 4640 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
4713 if (IS_ERR(handle)) { 4641 if (IS_ERR(handle)) {
4714 err = PTR_ERR(handle); 4642 err = PTR_ERR(handle);
4715 goto out_dio; 4643 goto out_dio;
@@ -4786,14 +4714,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4786 goto out; 4714 goto out;
4787 4715
4788 down_write(&EXT4_I(inode)->i_data_sem); 4716 down_write(&EXT4_I(inode)->i_data_sem);
4789 ext4_ext_invalidate_cache(inode);
4790 ext4_discard_preallocations(inode); 4717 ext4_discard_preallocations(inode);
4791 4718
4792 err = ext4_es_remove_extent(inode, first_block, 4719 err = ext4_es_remove_extent(inode, first_block,
4793 stop_block - first_block); 4720 stop_block - first_block);
4794 err = ext4_ext_remove_space(inode, first_block, stop_block - 1); 4721 err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
4795 4722
4796 ext4_ext_invalidate_cache(inode);
4797 ext4_discard_preallocations(inode); 4723 ext4_discard_preallocations(inode);
4798 4724
4799 if (IS_SYNC(inode)) 4725 if (IS_SYNC(inode))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 564d981a2fcc..f768f4a98a2b 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -23,40 +23,53 @@
23 * (e.g. Reservation space warning), and provide extent-level locking. 23 * (e.g. Reservation space warning), and provide extent-level locking.
24 * Delay extent tree is the first step to achieve this goal. It is 24 * Delay extent tree is the first step to achieve this goal. It is
25 * original built by Yongqiang Yang. At that time it is called delay 25 * original built by Yongqiang Yang. At that time it is called delay
26 * extent tree, whose goal is only track delay extent in memory to 26 * extent tree, whose goal is only track delayed extents in memory to
27 * simplify the implementation of fiemap and bigalloc, and introduce 27 * simplify the implementation of fiemap and bigalloc, and introduce
28 * lseek SEEK_DATA/SEEK_HOLE support. That is why it is still called 28 * lseek SEEK_DATA/SEEK_HOLE support. That is why it is still called
29 * delay extent tree at the following comment. But for better 29 * delay extent tree at the first commit. But for better understand
30 * understand what it does, it has been rename to extent status tree. 30 * what it does, it has been rename to extent status tree.
31 * 31 *
32 * Currently the first step has been done. All delay extents are 32 * Step1:
33 * tracked in the tree. It maintains the delay extent when a delay 33 * Currently the first step has been done. All delayed extents are
34 * allocation is issued, and the delay extent is written out or 34 * tracked in the tree. It maintains the delayed extent when a delayed
35 * allocation is issued, and the delayed extent is written out or
35 * invalidated. Therefore the implementation of fiemap and bigalloc 36 * invalidated. Therefore the implementation of fiemap and bigalloc
36 * are simplified, and SEEK_DATA/SEEK_HOLE are introduced. 37 * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
37 * 38 *
38 * The following comment describes the implemenmtation of extent 39 * The following comment describes the implemenmtation of extent
39 * status tree and future works. 40 * status tree and future works.
41 *
42 * Step2:
43 * In this step all extent status are tracked by extent status tree.
44 * Thus, we can first try to lookup a block mapping in this tree before
45 * finding it in extent tree. Hence, single extent cache can be removed
46 * because extent status tree can do a better job. Extents in status
47 * tree are loaded on-demand. Therefore, the extent status tree may not
48 * contain all of the extents in a file. Meanwhile we define a shrinker
49 * to reclaim memory from extent status tree because fragmented extent
50 * tree will make status tree cost too much memory. written/unwritten/-
51 * hole extents in the tree will be reclaimed by this shrinker when we
52 * are under high memory pressure. Delayed extents will not be
53 * reclimed because fiemap, bigalloc, and seek_data/hole need it.
40 */ 54 */
41 55
42/* 56/*
43 * extents status tree implementation for ext4. 57 * Extent status tree implementation for ext4.
44 * 58 *
45 * 59 *
46 * ========================================================================== 60 * ==========================================================================
47 * Extents status encompass delayed extents and extent locks 61 * Extent status tree tracks all extent status.
48 * 62 *
49 * 1. Why delayed extent implementation ? 63 * 1. Why we need to implement extent status tree?
50 * 64 *
51 * Without delayed extent, ext4 identifies a delayed extent by looking 65 * Without extent status tree, ext4 identifies a delayed extent by looking
52 * up page cache, this has several deficiencies - complicated, buggy, 66 * up page cache, this has several deficiencies - complicated, buggy,
53 * and inefficient code. 67 * and inefficient code.
54 * 68 *
55 * FIEMAP, SEEK_HOLE/DATA, bigalloc, punch hole and writeout all need 69 * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a
56 * to know if a block or a range of blocks are belonged to a delayed 70 * block or a range of blocks are belonged to a delayed extent.
57 * extent.
58 * 71 *
59 * Let us have a look at how they do without delayed extents implementation. 72 * Let us have a look at how they do without extent status tree.
60 * -- FIEMAP 73 * -- FIEMAP
61 * FIEMAP looks up page cache to identify delayed allocations from holes. 74 * FIEMAP looks up page cache to identify delayed allocations from holes.
62 * 75 *
@@ -68,47 +81,48 @@
68 * already under delayed allocation or not to determine whether 81 * already under delayed allocation or not to determine whether
69 * quota reserving is needed for the cluster. 82 * quota reserving is needed for the cluster.
70 * 83 *
71 * -- punch hole
72 * punch hole looks up page cache to identify a delayed extent.
73 *
74 * -- writeout 84 * -- writeout
75 * Writeout looks up whole page cache to see if a buffer is 85 * Writeout looks up whole page cache to see if a buffer is
76 * mapped, If there are not very many delayed buffers, then it is 86 * mapped, If there are not very many delayed buffers, then it is
77 * time comsuming. 87 * time comsuming.
78 * 88 *
79 * With delayed extents implementation, FIEMAP, SEEK_HOLE/DATA, 89 * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
80 * bigalloc and writeout can figure out if a block or a range of 90 * bigalloc and writeout can figure out if a block or a range of
81 * blocks is under delayed allocation(belonged to a delayed extent) or 91 * blocks is under delayed allocation(belonged to a delayed extent) or
82 * not by searching the delayed extent tree. 92 * not by searching the extent tree.
83 * 93 *
84 * 94 *
85 * ========================================================================== 95 * ==========================================================================
86 * 2. ext4 delayed extents impelmentation 96 * 2. Ext4 extent status tree impelmentation
97 *
98 * -- extent
99 * A extent is a range of blocks which are contiguous logically and
100 * physically. Unlike extent in extent tree, this extent in ext4 is
101 * a in-memory struct, there is no corresponding on-disk data. There
102 * is no limit on length of extent, so an extent can contain as many
103 * blocks as they are contiguous logically and physically.
87 * 104 *
88 * -- delayed extent 105 * -- extent status tree
89 * A delayed extent is a range of blocks which are contiguous 106 * Every inode has an extent status tree and all allocation blocks
90 * logically and under delayed allocation. Unlike extent in 107 * are added to the tree with different status. The extent in the
91 * ext4, delayed extent in ext4 is a in-memory struct, there is 108 * tree are ordered by logical block no.
92 * no corresponding on-disk data. There is no limit on length of
93 * delayed extent, so a delayed extent can contain as many blocks
94 * as they are contiguous logically.
95 * 109 *
96 * -- delayed extent tree 110 * -- operations on a extent status tree
97 * Every inode has a delayed extent tree and all under delayed 111 * There are three important operations on a delayed extent tree: find
98 * allocation blocks are added to the tree as delayed extents. 112 * next extent, adding a extent(a range of blocks) and removing a extent.
99 * Delayed extents in the tree are ordered by logical block no.
100 * 113 *
101 * -- operations on a delayed extent tree 114 * -- race on a extent status tree
102 * There are three operations on a delayed extent tree: find next 115 * Extent status tree is protected by inode->i_es_lock.
103 * delayed extent, adding a space(a range of blocks) and removing
104 * a space.
105 * 116 *
106 * -- race on a delayed extent tree 117 * -- memory consumption
107 * Delayed extent tree is protected inode->i_es_lock. 118 * Fragmented extent tree will make extent status tree cost too much
119 * memory. Hence, we will reclaim written/unwritten/hole extents from
120 * the tree under a heavy memory pressure.
108 * 121 *
109 * 122 *
110 * ========================================================================== 123 * ==========================================================================
111 * 3. performance analysis 124 * 3. Performance analysis
125 *
112 * -- overhead 126 * -- overhead
113 * 1. There is a cache extent for write access, so if writes are 127 * 1. There is a cache extent for write access, so if writes are
114 * not very random, adding space operaions are in O(1) time. 128 * not very random, adding space operaions are in O(1) time.
@@ -120,15 +134,21 @@
120 * 134 *
121 * ========================================================================== 135 * ==========================================================================
122 * 4. TODO list 136 * 4. TODO list
123 * -- Track all extent status
124 * 137 *
125 * -- Improve get block process 138 * -- Refactor delayed space reservation
126 * 139 *
127 * -- Extent-level locking 140 * -- Extent-level locking
128 */ 141 */
129 142
130static struct kmem_cache *ext4_es_cachep; 143static struct kmem_cache *ext4_es_cachep;
131 144
145static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
147 ext4_lblk_t end);
148static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
149 int nr_to_scan);
150static int ext4_es_reclaim_extents_count(struct super_block *sb);
151
132int __init ext4_init_es(void) 152int __init ext4_init_es(void)
133{ 153{
134 ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT); 154 ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
@@ -161,7 +181,9 @@ static void ext4_es_print_tree(struct inode *inode)
161 while (node) { 181 while (node) {
162 struct extent_status *es; 182 struct extent_status *es;
163 es = rb_entry(node, struct extent_status, rb_node); 183 es = rb_entry(node, struct extent_status, rb_node);
164 printk(KERN_DEBUG " [%u/%u)", es->start, es->len); 184 printk(KERN_DEBUG " [%u/%u) %llu %llx",
185 es->es_lblk, es->es_len,
186 ext4_es_pblock(es), ext4_es_status(es));
165 node = rb_next(node); 187 node = rb_next(node);
166 } 188 }
167 printk(KERN_DEBUG "\n"); 189 printk(KERN_DEBUG "\n");
@@ -170,10 +192,10 @@ static void ext4_es_print_tree(struct inode *inode)
170#define ext4_es_print_tree(inode) 192#define ext4_es_print_tree(inode)
171#endif 193#endif
172 194
173static inline ext4_lblk_t extent_status_end(struct extent_status *es) 195static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
174{ 196{
175 BUG_ON(es->start + es->len < es->start); 197 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
176 return es->start + es->len - 1; 198 return es->es_lblk + es->es_len - 1;
177} 199}
178 200
179/* 201/*
@@ -181,25 +203,25 @@ static inline ext4_lblk_t extent_status_end(struct extent_status *es)
181 * it can't be found, try to find next extent. 203 * it can't be found, try to find next extent.
182 */ 204 */
183static struct extent_status *__es_tree_search(struct rb_root *root, 205static struct extent_status *__es_tree_search(struct rb_root *root,
184 ext4_lblk_t offset) 206 ext4_lblk_t lblk)
185{ 207{
186 struct rb_node *node = root->rb_node; 208 struct rb_node *node = root->rb_node;
187 struct extent_status *es = NULL; 209 struct extent_status *es = NULL;
188 210
189 while (node) { 211 while (node) {
190 es = rb_entry(node, struct extent_status, rb_node); 212 es = rb_entry(node, struct extent_status, rb_node);
191 if (offset < es->start) 213 if (lblk < es->es_lblk)
192 node = node->rb_left; 214 node = node->rb_left;
193 else if (offset > extent_status_end(es)) 215 else if (lblk > ext4_es_end(es))
194 node = node->rb_right; 216 node = node->rb_right;
195 else 217 else
196 return es; 218 return es;
197 } 219 }
198 220
199 if (es && offset < es->start) 221 if (es && lblk < es->es_lblk)
200 return es; 222 return es;
201 223
202 if (es && offset > extent_status_end(es)) { 224 if (es && lblk > ext4_es_end(es)) {
203 node = rb_next(&es->rb_node); 225 node = rb_next(&es->rb_node);
204 return node ? rb_entry(node, struct extent_status, rb_node) : 226 return node ? rb_entry(node, struct extent_status, rb_node) :
205 NULL; 227 NULL;
@@ -209,79 +231,121 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
209} 231}
210 232
211/* 233/*
212 * ext4_es_find_extent: find the 1st delayed extent covering @es->start 234 * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk
213 * if it exists, otherwise, the next extent after @es->start. 235 * if it exists, otherwise, the next extent after @es->lblk.
214 * 236 *
215 * @inode: the inode which owns delayed extents 237 * @inode: the inode which owns delayed extents
238 * @lblk: the offset where we start to search
216 * @es: delayed extent that we found 239 * @es: delayed extent that we found
217 *
218 * Returns the first block of the next extent after es, otherwise
219 * EXT_MAX_BLOCKS if no delay extent is found.
220 * Delayed extent is returned via @es.
221 */ 240 */
222ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) 241void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
242 struct extent_status *es)
223{ 243{
224 struct ext4_es_tree *tree = NULL; 244 struct ext4_es_tree *tree = NULL;
225 struct extent_status *es1 = NULL; 245 struct extent_status *es1 = NULL;
226 struct rb_node *node; 246 struct rb_node *node;
227 ext4_lblk_t ret = EXT_MAX_BLOCKS;
228 247
229 trace_ext4_es_find_extent_enter(inode, es->start); 248 BUG_ON(es == NULL);
249 trace_ext4_es_find_delayed_extent_enter(inode, lblk);
230 250
231 read_lock(&EXT4_I(inode)->i_es_lock); 251 read_lock(&EXT4_I(inode)->i_es_lock);
232 tree = &EXT4_I(inode)->i_es_tree; 252 tree = &EXT4_I(inode)->i_es_tree;
233 253
234 /* find delay extent in cache firstly */ 254 /* find extent in cache firstly */
255 es->es_lblk = es->es_len = es->es_pblk = 0;
235 if (tree->cache_es) { 256 if (tree->cache_es) {
236 es1 = tree->cache_es; 257 es1 = tree->cache_es;
237 if (in_range(es->start, es1->start, es1->len)) { 258 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
238 es_debug("%u cached by [%u/%u)\n", 259 es_debug("%u cached by [%u/%u) %llu %llx\n",
239 es->start, es1->start, es1->len); 260 lblk, es1->es_lblk, es1->es_len,
261 ext4_es_pblock(es1), ext4_es_status(es1));
240 goto out; 262 goto out;
241 } 263 }
242 } 264 }
243 265
244 es->len = 0; 266 es1 = __es_tree_search(&tree->root, lblk);
245 es1 = __es_tree_search(&tree->root, es->start);
246 267
247out: 268out:
248 if (es1) { 269 if (es1 && !ext4_es_is_delayed(es1)) {
249 tree->cache_es = es1; 270 while ((node = rb_next(&es1->rb_node)) != NULL) {
250 es->start = es1->start;
251 es->len = es1->len;
252 node = rb_next(&es1->rb_node);
253 if (node) {
254 es1 = rb_entry(node, struct extent_status, rb_node); 271 es1 = rb_entry(node, struct extent_status, rb_node);
255 ret = es1->start; 272 if (ext4_es_is_delayed(es1))
273 break;
256 } 274 }
257 } 275 }
258 276
277 if (es1 && ext4_es_is_delayed(es1)) {
278 tree->cache_es = es1;
279 es->es_lblk = es1->es_lblk;
280 es->es_len = es1->es_len;
281 es->es_pblk = es1->es_pblk;
282 }
283
259 read_unlock(&EXT4_I(inode)->i_es_lock); 284 read_unlock(&EXT4_I(inode)->i_es_lock);
260 285
261 trace_ext4_es_find_extent_exit(inode, es, ret); 286 ext4_es_lru_add(inode);
262 return ret; 287 trace_ext4_es_find_delayed_extent_exit(inode, es);
263} 288}
264 289
265static struct extent_status * 290static struct extent_status *
266ext4_es_alloc_extent(ext4_lblk_t start, ext4_lblk_t len) 291ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
292 ext4_fsblk_t pblk)
267{ 293{
268 struct extent_status *es; 294 struct extent_status *es;
269 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); 295 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
270 if (es == NULL) 296 if (es == NULL)
271 return NULL; 297 return NULL;
272 es->start = start; 298 es->es_lblk = lblk;
273 es->len = len; 299 es->es_len = len;
300 es->es_pblk = pblk;
301
302 /*
303 * We don't count delayed extent because we never try to reclaim them
304 */
305 if (!ext4_es_is_delayed(es))
306 EXT4_I(inode)->i_es_lru_nr++;
307
274 return es; 308 return es;
275} 309}
276 310
277static void ext4_es_free_extent(struct extent_status *es) 311static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
278{ 312{
313 /* Decrease the lru counter when this es is not delayed */
314 if (!ext4_es_is_delayed(es)) {
315 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
316 EXT4_I(inode)->i_es_lru_nr--;
317 }
318
279 kmem_cache_free(ext4_es_cachep, es); 319 kmem_cache_free(ext4_es_cachep, es);
280} 320}
281 321
322/*
323 * Check whether or not two extents can be merged
324 * Condition:
325 * - logical block number is contiguous
326 * - physical block number is contiguous
327 * - status is equal
328 */
329static int ext4_es_can_be_merged(struct extent_status *es1,
330 struct extent_status *es2)
331{
332 if (es1->es_lblk + es1->es_len != es2->es_lblk)
333 return 0;
334
335 if (ext4_es_status(es1) != ext4_es_status(es2))
336 return 0;
337
338 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
339 (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
340 return 0;
341
342 return 1;
343}
344
282static struct extent_status * 345static struct extent_status *
283ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es) 346ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
284{ 347{
348 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
285 struct extent_status *es1; 349 struct extent_status *es1;
286 struct rb_node *node; 350 struct rb_node *node;
287 351
@@ -290,10 +354,10 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
290 return es; 354 return es;
291 355
292 es1 = rb_entry(node, struct extent_status, rb_node); 356 es1 = rb_entry(node, struct extent_status, rb_node);
293 if (es->start == extent_status_end(es1) + 1) { 357 if (ext4_es_can_be_merged(es1, es)) {
294 es1->len += es->len; 358 es1->es_len += es->es_len;
295 rb_erase(&es->rb_node, &tree->root); 359 rb_erase(&es->rb_node, &tree->root);
296 ext4_es_free_extent(es); 360 ext4_es_free_extent(inode, es);
297 es = es1; 361 es = es1;
298 } 362 }
299 363
@@ -301,8 +365,9 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
301} 365}
302 366
303static struct extent_status * 367static struct extent_status *
304ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es) 368ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
305{ 369{
370 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
306 struct extent_status *es1; 371 struct extent_status *es1;
307 struct rb_node *node; 372 struct rb_node *node;
308 373
@@ -311,69 +376,57 @@ ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
311 return es; 376 return es;
312 377
313 es1 = rb_entry(node, struct extent_status, rb_node); 378 es1 = rb_entry(node, struct extent_status, rb_node);
314 if (es1->start == extent_status_end(es) + 1) { 379 if (ext4_es_can_be_merged(es, es1)) {
315 es->len += es1->len; 380 es->es_len += es1->es_len;
316 rb_erase(node, &tree->root); 381 rb_erase(node, &tree->root);
317 ext4_es_free_extent(es1); 382 ext4_es_free_extent(inode, es1);
318 } 383 }
319 384
320 return es; 385 return es;
321} 386}
322 387
323static int __es_insert_extent(struct ext4_es_tree *tree, ext4_lblk_t offset, 388static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
324 ext4_lblk_t len)
325{ 389{
390 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
326 struct rb_node **p = &tree->root.rb_node; 391 struct rb_node **p = &tree->root.rb_node;
327 struct rb_node *parent = NULL; 392 struct rb_node *parent = NULL;
328 struct extent_status *es; 393 struct extent_status *es;
329 ext4_lblk_t end = offset + len - 1;
330
331 BUG_ON(end < offset);
332 es = tree->cache_es;
333 if (es && offset == (extent_status_end(es) + 1)) {
334 es_debug("cached by [%u/%u)\n", es->start, es->len);
335 es->len += len;
336 es = ext4_es_try_to_merge_right(tree, es);
337 goto out;
338 } else if (es && es->start == end + 1) {
339 es_debug("cached by [%u/%u)\n", es->start, es->len);
340 es->start = offset;
341 es->len += len;
342 es = ext4_es_try_to_merge_left(tree, es);
343 goto out;
344 } else if (es && es->start <= offset &&
345 end <= extent_status_end(es)) {
346 es_debug("cached by [%u/%u)\n", es->start, es->len);
347 goto out;
348 }
349 394
350 while (*p) { 395 while (*p) {
351 parent = *p; 396 parent = *p;
352 es = rb_entry(parent, struct extent_status, rb_node); 397 es = rb_entry(parent, struct extent_status, rb_node);
353 398
354 if (offset < es->start) { 399 if (newes->es_lblk < es->es_lblk) {
355 if (es->start == end + 1) { 400 if (ext4_es_can_be_merged(newes, es)) {
356 es->start = offset; 401 /*
357 es->len += len; 402 * Here we can modify es_lblk directly
358 es = ext4_es_try_to_merge_left(tree, es); 403 * because it isn't overlapped.
404 */
405 es->es_lblk = newes->es_lblk;
406 es->es_len += newes->es_len;
407 if (ext4_es_is_written(es) ||
408 ext4_es_is_unwritten(es))
409 ext4_es_store_pblock(es,
410 newes->es_pblk);
411 es = ext4_es_try_to_merge_left(inode, es);
359 goto out; 412 goto out;
360 } 413 }
361 p = &(*p)->rb_left; 414 p = &(*p)->rb_left;
362 } else if (offset > extent_status_end(es)) { 415 } else if (newes->es_lblk > ext4_es_end(es)) {
363 if (offset == extent_status_end(es) + 1) { 416 if (ext4_es_can_be_merged(es, newes)) {
364 es->len += len; 417 es->es_len += newes->es_len;
365 es = ext4_es_try_to_merge_right(tree, es); 418 es = ext4_es_try_to_merge_right(inode, es);
366 goto out; 419 goto out;
367 } 420 }
368 p = &(*p)->rb_right; 421 p = &(*p)->rb_right;
369 } else { 422 } else {
370 if (extent_status_end(es) <= end) 423 BUG_ON(1);
371 es->len = offset - es->start + len; 424 return -EINVAL;
372 goto out;
373 } 425 }
374 } 426 }
375 427
376 es = ext4_es_alloc_extent(offset, len); 428 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
429 newes->es_pblk);
377 if (!es) 430 if (!es)
378 return -ENOMEM; 431 return -ENOMEM;
379 rb_link_node(&es->rb_node, parent, p); 432 rb_link_node(&es->rb_node, parent, p);
@@ -385,85 +438,166 @@ out:
385} 438}
386 439
387/* 440/*
388 * ext4_es_insert_extent() adds a space to a delayed extent tree. 441 * ext4_es_insert_extent() adds a space to a extent status tree.
389 * Caller holds inode->i_es_lock.
390 * 442 *
391 * ext4_es_insert_extent is called by ext4_da_write_begin and 443 * ext4_es_insert_extent is called by ext4_da_write_begin and
392 * ext4_es_remove_extent. 444 * ext4_es_remove_extent.
393 * 445 *
394 * Return 0 on success, error code on failure. 446 * Return 0 on success, error code on failure.
395 */ 447 */
396int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t offset, 448int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
397 ext4_lblk_t len) 449 ext4_lblk_t len, ext4_fsblk_t pblk,
450 unsigned long long status)
398{ 451{
399 struct ext4_es_tree *tree; 452 struct extent_status newes;
453 ext4_lblk_t end = lblk + len - 1;
400 int err = 0; 454 int err = 0;
401 455
402 trace_ext4_es_insert_extent(inode, offset, len); 456 es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
403 es_debug("add [%u/%u) to extent status tree of inode %lu\n", 457 lblk, len, pblk, status, inode->i_ino);
404 offset, len, inode->i_ino); 458
459 if (!len)
460 return 0;
461
462 BUG_ON(end < lblk);
463
464 newes.es_lblk = lblk;
465 newes.es_len = len;
466 ext4_es_store_pblock(&newes, pblk);
467 ext4_es_store_status(&newes, status);
468 trace_ext4_es_insert_extent(inode, &newes);
405 469
406 write_lock(&EXT4_I(inode)->i_es_lock); 470 write_lock(&EXT4_I(inode)->i_es_lock);
407 tree = &EXT4_I(inode)->i_es_tree; 471 err = __es_remove_extent(inode, lblk, end);
408 err = __es_insert_extent(tree, offset, len); 472 if (err != 0)
473 goto error;
474 err = __es_insert_extent(inode, &newes);
475
476error:
409 write_unlock(&EXT4_I(inode)->i_es_lock); 477 write_unlock(&EXT4_I(inode)->i_es_lock);
410 478
479 ext4_es_lru_add(inode);
411 ext4_es_print_tree(inode); 480 ext4_es_print_tree(inode);
412 481
413 return err; 482 return err;
414} 483}
415 484
416/* 485/*
417 * ext4_es_remove_extent() removes a space from a delayed extent tree. 486 * ext4_es_lookup_extent() looks up an extent in extent status tree.
418 * Caller holds inode->i_es_lock.
419 * 487 *
420 * Return 0 on success, error code on failure. 488 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
489 *
490 * Return: 1 on found, 0 on not
421 */ 491 */
422int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset, 492int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
423 ext4_lblk_t len) 493 struct extent_status *es)
424{ 494{
425 struct rb_node *node;
426 struct ext4_es_tree *tree; 495 struct ext4_es_tree *tree;
496 struct extent_status *es1 = NULL;
497 struct rb_node *node;
498 int found = 0;
499
500 trace_ext4_es_lookup_extent_enter(inode, lblk);
501 es_debug("lookup extent in block %u\n", lblk);
502
503 tree = &EXT4_I(inode)->i_es_tree;
504 read_lock(&EXT4_I(inode)->i_es_lock);
505
506 /* find extent in cache firstly */
507 es->es_lblk = es->es_len = es->es_pblk = 0;
508 if (tree->cache_es) {
509 es1 = tree->cache_es;
510 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
511 es_debug("%u cached by [%u/%u)\n",
512 lblk, es1->es_lblk, es1->es_len);
513 found = 1;
514 goto out;
515 }
516 }
517
518 node = tree->root.rb_node;
519 while (node) {
520 es1 = rb_entry(node, struct extent_status, rb_node);
521 if (lblk < es1->es_lblk)
522 node = node->rb_left;
523 else if (lblk > ext4_es_end(es1))
524 node = node->rb_right;
525 else {
526 found = 1;
527 break;
528 }
529 }
530
531out:
532 if (found) {
533 BUG_ON(!es1);
534 es->es_lblk = es1->es_lblk;
535 es->es_len = es1->es_len;
536 es->es_pblk = es1->es_pblk;
537 }
538
539 read_unlock(&EXT4_I(inode)->i_es_lock);
540
541 ext4_es_lru_add(inode);
542 trace_ext4_es_lookup_extent_exit(inode, es, found);
543 return found;
544}
545
546static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
547 ext4_lblk_t end)
548{
549 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
550 struct rb_node *node;
427 struct extent_status *es; 551 struct extent_status *es;
428 struct extent_status orig_es; 552 struct extent_status orig_es;
429 ext4_lblk_t len1, len2, end; 553 ext4_lblk_t len1, len2;
554 ext4_fsblk_t block;
430 int err = 0; 555 int err = 0;
431 556
432 trace_ext4_es_remove_extent(inode, offset, len); 557 es = __es_tree_search(&tree->root, lblk);
433 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
434 offset, len, inode->i_ino);
435
436 end = offset + len - 1;
437 BUG_ON(end < offset);
438 write_lock(&EXT4_I(inode)->i_es_lock);
439 tree = &EXT4_I(inode)->i_es_tree;
440 es = __es_tree_search(&tree->root, offset);
441 if (!es) 558 if (!es)
442 goto out; 559 goto out;
443 if (es->start > end) 560 if (es->es_lblk > end)
444 goto out; 561 goto out;
445 562
446 /* Simply invalidate cache_es. */ 563 /* Simply invalidate cache_es. */
447 tree->cache_es = NULL; 564 tree->cache_es = NULL;
448 565
449 orig_es.start = es->start; 566 orig_es.es_lblk = es->es_lblk;
450 orig_es.len = es->len; 567 orig_es.es_len = es->es_len;
451 len1 = offset > es->start ? offset - es->start : 0; 568 orig_es.es_pblk = es->es_pblk;
452 len2 = extent_status_end(es) > end ? 569
453 extent_status_end(es) - end : 0; 570 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
571 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
454 if (len1 > 0) 572 if (len1 > 0)
455 es->len = len1; 573 es->es_len = len1;
456 if (len2 > 0) { 574 if (len2 > 0) {
457 if (len1 > 0) { 575 if (len1 > 0) {
458 err = __es_insert_extent(tree, end + 1, len2); 576 struct extent_status newes;
577
578 newes.es_lblk = end + 1;
579 newes.es_len = len2;
580 if (ext4_es_is_written(&orig_es) ||
581 ext4_es_is_unwritten(&orig_es)) {
582 block = ext4_es_pblock(&orig_es) +
583 orig_es.es_len - len2;
584 ext4_es_store_pblock(&newes, block);
585 }
586 ext4_es_store_status(&newes, ext4_es_status(&orig_es));
587 err = __es_insert_extent(inode, &newes);
459 if (err) { 588 if (err) {
460 es->start = orig_es.start; 589 es->es_lblk = orig_es.es_lblk;
461 es->len = orig_es.len; 590 es->es_len = orig_es.es_len;
462 goto out; 591 goto out;
463 } 592 }
464 } else { 593 } else {
465 es->start = end + 1; 594 es->es_lblk = end + 1;
466 es->len = len2; 595 es->es_len = len2;
596 if (ext4_es_is_written(es) ||
597 ext4_es_is_unwritten(es)) {
598 block = orig_es.es_pblk + orig_es.es_len - len2;
599 ext4_es_store_pblock(es, block);
600 }
467 } 601 }
468 goto out; 602 goto out;
469 } 603 }
@@ -476,10 +610,10 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
476 es = NULL; 610 es = NULL;
477 } 611 }
478 612
479 while (es && extent_status_end(es) <= end) { 613 while (es && ext4_es_end(es) <= end) {
480 node = rb_next(&es->rb_node); 614 node = rb_next(&es->rb_node);
481 rb_erase(&es->rb_node, &tree->root); 615 rb_erase(&es->rb_node, &tree->root);
482 ext4_es_free_extent(es); 616 ext4_es_free_extent(inode, es);
483 if (!node) { 617 if (!node) {
484 es = NULL; 618 es = NULL;
485 break; 619 break;
@@ -487,14 +621,183 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
487 es = rb_entry(node, struct extent_status, rb_node); 621 es = rb_entry(node, struct extent_status, rb_node);
488 } 622 }
489 623
490 if (es && es->start < end + 1) { 624 if (es && es->es_lblk < end + 1) {
491 len1 = extent_status_end(es) - end; 625 ext4_lblk_t orig_len = es->es_len;
492 es->start = end + 1; 626
493 es->len = len1; 627 len1 = ext4_es_end(es) - end;
628 es->es_lblk = end + 1;
629 es->es_len = len1;
630 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
631 block = es->es_pblk + orig_len - len1;
632 ext4_es_store_pblock(es, block);
633 }
494 } 634 }
495 635
496out: 636out:
637 return err;
638}
639
640/*
641 * ext4_es_remove_extent() removes a space from a extent status tree.
642 *
643 * Return 0 on success, error code on failure.
644 */
645int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
646 ext4_lblk_t len)
647{
648 ext4_lblk_t end;
649 int err = 0;
650
651 trace_ext4_es_remove_extent(inode, lblk, len);
652 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
653 lblk, len, inode->i_ino);
654
655 if (!len)
656 return err;
657
658 end = lblk + len - 1;
659 BUG_ON(end < lblk);
660
661 write_lock(&EXT4_I(inode)->i_es_lock);
662 err = __es_remove_extent(inode, lblk, end);
497 write_unlock(&EXT4_I(inode)->i_es_lock); 663 write_unlock(&EXT4_I(inode)->i_es_lock);
498 ext4_es_print_tree(inode); 664 ext4_es_print_tree(inode);
499 return err; 665 return err;
500} 666}
667
668static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
669{
670 struct ext4_sb_info *sbi = container_of(shrink,
671 struct ext4_sb_info, s_es_shrinker);
672 struct ext4_inode_info *ei;
673 struct list_head *cur, *tmp, scanned;
674 int nr_to_scan = sc->nr_to_scan;
675 int ret, nr_shrunk = 0;
676
677 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
678
679 if (!nr_to_scan)
680 return ext4_es_reclaim_extents_count(sbi->s_sb);
681
682 INIT_LIST_HEAD(&scanned);
683
684 spin_lock(&sbi->s_es_lru_lock);
685 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
686 list_move_tail(cur, &scanned);
687
688 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
689
690 read_lock(&ei->i_es_lock);
691 if (ei->i_es_lru_nr == 0) {
692 read_unlock(&ei->i_es_lock);
693 continue;
694 }
695 read_unlock(&ei->i_es_lock);
696
697 write_lock(&ei->i_es_lock);
698 ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
699 write_unlock(&ei->i_es_lock);
700
701 nr_shrunk += ret;
702 nr_to_scan -= ret;
703 if (nr_to_scan == 0)
704 break;
705 }
706 list_splice_tail(&scanned, &sbi->s_es_lru);
707 spin_unlock(&sbi->s_es_lru_lock);
708 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
709
710 return ext4_es_reclaim_extents_count(sbi->s_sb);
711}
712
713void ext4_es_register_shrinker(struct super_block *sb)
714{
715 struct ext4_sb_info *sbi;
716
717 sbi = EXT4_SB(sb);
718 INIT_LIST_HEAD(&sbi->s_es_lru);
719 spin_lock_init(&sbi->s_es_lru_lock);
720 sbi->s_es_shrinker.shrink = ext4_es_shrink;
721 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
722 register_shrinker(&sbi->s_es_shrinker);
723}
724
725void ext4_es_unregister_shrinker(struct super_block *sb)
726{
727 unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
728}
729
730void ext4_es_lru_add(struct inode *inode)
731{
732 struct ext4_inode_info *ei = EXT4_I(inode);
733 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
734
735 spin_lock(&sbi->s_es_lru_lock);
736 if (list_empty(&ei->i_es_lru))
737 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
738 else
739 list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
740 spin_unlock(&sbi->s_es_lru_lock);
741}
742
743void ext4_es_lru_del(struct inode *inode)
744{
745 struct ext4_inode_info *ei = EXT4_I(inode);
746 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
747
748 spin_lock(&sbi->s_es_lru_lock);
749 if (!list_empty(&ei->i_es_lru))
750 list_del_init(&ei->i_es_lru);
751 spin_unlock(&sbi->s_es_lru_lock);
752}
753
754static int ext4_es_reclaim_extents_count(struct super_block *sb)
755{
756 struct ext4_sb_info *sbi = EXT4_SB(sb);
757 struct ext4_inode_info *ei;
758 struct list_head *cur;
759 int nr_cached = 0;
760
761 spin_lock(&sbi->s_es_lru_lock);
762 list_for_each(cur, &sbi->s_es_lru) {
763 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
764 read_lock(&ei->i_es_lock);
765 nr_cached += ei->i_es_lru_nr;
766 read_unlock(&ei->i_es_lock);
767 }
768 spin_unlock(&sbi->s_es_lru_lock);
769 trace_ext4_es_reclaim_extents_count(sb, nr_cached);
770 return nr_cached;
771}
772
773static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
774 int nr_to_scan)
775{
776 struct inode *inode = &ei->vfs_inode;
777 struct ext4_es_tree *tree = &ei->i_es_tree;
778 struct rb_node *node;
779 struct extent_status *es;
780 int nr_shrunk = 0;
781
782 if (ei->i_es_lru_nr == 0)
783 return 0;
784
785 node = rb_first(&tree->root);
786 while (node != NULL) {
787 es = rb_entry(node, struct extent_status, rb_node);
788 node = rb_next(&es->rb_node);
789 /*
790 * We can't reclaim delayed extent from status tree because
791 * fiemap, bigallic, and seek_data/hole need to use it.
792 */
793 if (!ext4_es_is_delayed(es)) {
794 rb_erase(&es->rb_node, &tree->root);
795 ext4_es_free_extent(inode, es);
796 nr_shrunk++;
797 if (--nr_to_scan == 0)
798 break;
799 }
800 }
801 tree->cache_es = NULL;
802 return nr_shrunk;
803}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 077f82db092a..f190dfe969da 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -20,10 +20,24 @@
20#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 20#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
21#endif 21#endif
22 22
23/*
24 * These flags live in the high bits of extent_status.es_pblk
25 */
26#define EXTENT_STATUS_WRITTEN (1ULL << 63)
27#define EXTENT_STATUS_UNWRITTEN (1ULL << 62)
28#define EXTENT_STATUS_DELAYED (1ULL << 61)
29#define EXTENT_STATUS_HOLE (1ULL << 60)
30
31#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
32 EXTENT_STATUS_UNWRITTEN | \
33 EXTENT_STATUS_DELAYED | \
34 EXTENT_STATUS_HOLE)
35
23struct extent_status { 36struct extent_status {
24 struct rb_node rb_node; 37 struct rb_node rb_node;
25 ext4_lblk_t start; /* first block extent covers */ 38 ext4_lblk_t es_lblk; /* first logical block extent covers */
26 ext4_lblk_t len; /* length of extent in block */ 39 ext4_lblk_t es_len; /* length of extent in block */
40 ext4_fsblk_t es_pblk; /* first physical block */
27}; 41};
28 42
29struct ext4_es_tree { 43struct ext4_es_tree {
@@ -35,11 +49,69 @@ extern int __init ext4_init_es(void);
35extern void ext4_exit_es(void); 49extern void ext4_exit_es(void);
36extern void ext4_es_init_tree(struct ext4_es_tree *tree); 50extern void ext4_es_init_tree(struct ext4_es_tree *tree);
37 51
38extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t start, 52extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
53 ext4_lblk_t len, ext4_fsblk_t pblk,
54 unsigned long long status);
55extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
39 ext4_lblk_t len); 56 ext4_lblk_t len);
40extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t start, 57extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
41 ext4_lblk_t len); 58 struct extent_status *es);
42extern ext4_lblk_t ext4_es_find_extent(struct inode *inode, 59extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
43 struct extent_status *es); 60 struct extent_status *es);
61
62static inline int ext4_es_is_written(struct extent_status *es)
63{
64 return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0;
65}
66
67static inline int ext4_es_is_unwritten(struct extent_status *es)
68{
69 return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0;
70}
71
72static inline int ext4_es_is_delayed(struct extent_status *es)
73{
74 return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0;
75}
76
77static inline int ext4_es_is_hole(struct extent_status *es)
78{
79 return (es->es_pblk & EXTENT_STATUS_HOLE) != 0;
80}
81
82static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
83{
84 return (es->es_pblk & EXTENT_STATUS_FLAGS);
85}
86
87static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
88{
89 return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
90}
91
92static inline void ext4_es_store_pblock(struct extent_status *es,
93 ext4_fsblk_t pb)
94{
95 ext4_fsblk_t block;
96
97 block = (pb & ~EXTENT_STATUS_FLAGS) |
98 (es->es_pblk & EXTENT_STATUS_FLAGS);
99 es->es_pblk = block;
100}
101
102static inline void ext4_es_store_status(struct extent_status *es,
103 unsigned long long status)
104{
105 ext4_fsblk_t block;
106
107 block = (status & EXTENT_STATUS_FLAGS) |
108 (es->es_pblk & ~EXTENT_STATUS_FLAGS);
109 es->es_pblk = block;
110}
111
112extern void ext4_es_register_shrinker(struct super_block *sb);
113extern void ext4_es_unregister_shrinker(struct super_block *sb);
114extern void ext4_es_lru_add(struct inode *inode);
115extern void ext4_es_lru_del(struct inode *inode);
44 116
45#endif /* _EXT4_EXTENTS_STATUS_H */ 117#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 405565a62277..64848b595b24 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -167,7 +167,7 @@ static ssize_t
167ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 167ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
168 unsigned long nr_segs, loff_t pos) 168 unsigned long nr_segs, loff_t pos)
169{ 169{
170 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 170 struct inode *inode = file_inode(iocb->ki_filp);
171 ssize_t ret; 171 ssize_t ret;
172 172
173 /* 173 /*
@@ -240,7 +240,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
240 handle_t *handle; 240 handle_t *handle;
241 int err; 241 int err;
242 242
243 handle = ext4_journal_start_sb(sb, 1); 243 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
244 if (IS_ERR(handle)) 244 if (IS_ERR(handle))
245 return PTR_ERR(handle); 245 return PTR_ERR(handle);
246 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 246 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
@@ -464,10 +464,8 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
464 * If there is a delay extent at this offset, 464 * If there is a delay extent at this offset,
465 * it will be as a data. 465 * it will be as a data.
466 */ 466 */
467 es.start = last; 467 ext4_es_find_delayed_extent(inode, last, &es);
468 (void)ext4_es_find_extent(inode, &es); 468 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
469 if (last >= es.start &&
470 last < es.start + es.len) {
471 if (last != start) 469 if (last != start)
472 dataoff = last << blkbits; 470 dataoff = last << blkbits;
473 break; 471 break;
@@ -549,11 +547,9 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
549 * If there is a delay extent at this offset, 547 * If there is a delay extent at this offset,
550 * we will skip this extent. 548 * we will skip this extent.
551 */ 549 */
552 es.start = last; 550 ext4_es_find_delayed_extent(inode, last, &es);
553 (void)ext4_es_find_extent(inode, &es); 551 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
554 if (last >= es.start && 552 last = es.es_lblk + es.es_len;
555 last < es.start + es.len) {
556 last = es.start + es.len;
557 holeoff = last << blkbits; 553 holeoff = last << blkbits;
558 continue; 554 continue;
559 } 555 }
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index fa8e4911d354..3d586f02883e 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -155,11 +155,11 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
155 /* Check to see if the seed is all zero's */ 155 /* Check to see if the seed is all zero's */
156 if (hinfo->seed) { 156 if (hinfo->seed) {
157 for (i = 0; i < 4; i++) { 157 for (i = 0; i < 4; i++) {
158 if (hinfo->seed[i]) 158 if (hinfo->seed[i]) {
159 memcpy(buf, hinfo->seed, sizeof(buf));
159 break; 160 break;
161 }
160 } 162 }
161 if (i < 4)
162 memcpy(buf, hinfo->seed, sizeof(buf));
163 } 163 }
164 164
165 switch (hinfo->hash_version) { 165 switch (hinfo->hash_version) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3f32c8012447..32fd2b9075dd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -634,8 +634,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
634 * For other inodes, search forward from the parent directory's block 634 * For other inodes, search forward from the parent directory's block
635 * group to find a free inode. 635 * group to find a free inode.
636 */ 636 */
637struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, 637struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
638 const struct qstr *qstr, __u32 goal, uid_t *owner) 638 umode_t mode, const struct qstr *qstr,
639 __u32 goal, uid_t *owner, int handle_type,
640 unsigned int line_no, int nblocks)
639{ 641{
640 struct super_block *sb; 642 struct super_block *sb;
641 struct buffer_head *inode_bitmap_bh = NULL; 643 struct buffer_head *inode_bitmap_bh = NULL;
@@ -725,6 +727,15 @@ repeat_in_this_group:
725 "inode=%lu", ino + 1); 727 "inode=%lu", ino + 1);
726 continue; 728 continue;
727 } 729 }
730 if (!handle) {
731 BUG_ON(nblocks <= 0);
732 handle = __ext4_journal_start_sb(dir->i_sb, line_no,
733 handle_type, nblocks);
734 if (IS_ERR(handle)) {
735 err = PTR_ERR(handle);
736 goto fail;
737 }
738 }
728 BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 739 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
729 err = ext4_journal_get_write_access(handle, inode_bitmap_bh); 740 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
730 if (err) 741 if (err)
@@ -1017,17 +1028,17 @@ iget_failed:
1017 inode = NULL; 1028 inode = NULL;
1018bad_orphan: 1029bad_orphan:
1019 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); 1030 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
1020 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 1031 printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
1021 bit, (unsigned long long)bitmap_bh->b_blocknr, 1032 bit, (unsigned long long)bitmap_bh->b_blocknr,
1022 ext4_test_bit(bit, bitmap_bh->b_data)); 1033 ext4_test_bit(bit, bitmap_bh->b_data));
1023 printk(KERN_NOTICE "inode=%p\n", inode); 1034 printk(KERN_WARNING "inode=%p\n", inode);
1024 if (inode) { 1035 if (inode) {
1025 printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", 1036 printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
1026 is_bad_inode(inode)); 1037 is_bad_inode(inode));
1027 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 1038 printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
1028 NEXT_ORPHAN(inode)); 1039 NEXT_ORPHAN(inode));
1029 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 1040 printk(KERN_WARNING "max_ino=%lu\n", max_ino);
1030 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); 1041 printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
1031 /* Avoid freeing blocks if we got a bad deleted inode */ 1042 /* Avoid freeing blocks if we got a bad deleted inode */
1032 if (inode->i_nlink == 0) 1043 if (inode->i_nlink == 0)
1033 inode->i_blocks = 0; 1044 inode->i_blocks = 0;
@@ -1137,7 +1148,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1137 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) 1148 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1138 goto out; 1149 goto out;
1139 1150
1140 handle = ext4_journal_start_sb(sb, 1); 1151 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
1141 if (IS_ERR(handle)) { 1152 if (IS_ERR(handle)) {
1142 ret = PTR_ERR(handle); 1153 ret = PTR_ERR(handle);
1143 goto out; 1154 goto out;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 20862f96e8ae..c541ab8b64dd 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -146,6 +146,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
146 struct super_block *sb = inode->i_sb; 146 struct super_block *sb = inode->i_sb;
147 Indirect *p = chain; 147 Indirect *p = chain;
148 struct buffer_head *bh; 148 struct buffer_head *bh;
149 int ret = -EIO;
149 150
150 *err = 0; 151 *err = 0;
151 /* i_data is not going away, no lock needed */ 152 /* i_data is not going away, no lock needed */
@@ -154,8 +155,10 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
154 goto no_block; 155 goto no_block;
155 while (--depth) { 156 while (--depth) {
156 bh = sb_getblk(sb, le32_to_cpu(p->key)); 157 bh = sb_getblk(sb, le32_to_cpu(p->key));
157 if (unlikely(!bh)) 158 if (unlikely(!bh)) {
159 ret = -ENOMEM;
158 goto failure; 160 goto failure;
161 }
159 162
160 if (!bh_uptodate_or_lock(bh)) { 163 if (!bh_uptodate_or_lock(bh)) {
161 if (bh_submit_read(bh) < 0) { 164 if (bh_submit_read(bh) < 0) {
@@ -177,7 +180,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
177 return NULL; 180 return NULL;
178 181
179failure: 182failure:
180 *err = -EIO; 183 *err = ret;
181no_block: 184no_block:
182 return p; 185 return p;
183} 186}
@@ -355,9 +358,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
355 * for the first direct block 358 * for the first direct block
356 */ 359 */
357 new_blocks[index] = current_block; 360 new_blocks[index] = current_block;
358 printk(KERN_INFO "%s returned more blocks than " 361 WARN(1, KERN_INFO "%s returned more blocks than "
359 "requested\n", __func__); 362 "requested\n", __func__);
360 WARN_ON(1);
361 break; 363 break;
362 } 364 }
363 } 365 }
@@ -471,7 +473,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
471 */ 473 */
472 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 474 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
473 if (unlikely(!bh)) { 475 if (unlikely(!bh)) {
474 err = -EIO; 476 err = -ENOMEM;
475 goto failed; 477 goto failed;
476 } 478 }
477 479
@@ -789,7 +791,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
789 791
790 if (final_size > inode->i_size) { 792 if (final_size > inode->i_size) {
791 /* Credits for sb + inode write */ 793 /* Credits for sb + inode write */
792 handle = ext4_journal_start(inode, 2); 794 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
793 if (IS_ERR(handle)) { 795 if (IS_ERR(handle)) {
794 ret = PTR_ERR(handle); 796 ret = PTR_ERR(handle);
795 goto out; 797 goto out;
@@ -849,7 +851,7 @@ locked:
849 int err; 851 int err;
850 852
851 /* Credits for sb + inode write */ 853 /* Credits for sb + inode write */
852 handle = ext4_journal_start(inode, 2); 854 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
853 if (IS_ERR(handle)) { 855 if (IS_ERR(handle)) {
854 /* This is really bad luck. We've written the data 856 /* This is really bad luck. We've written the data
855 * but cannot extend i_size. Bail out and pretend 857 * but cannot extend i_size. Bail out and pretend
@@ -948,7 +950,8 @@ static handle_t *start_transaction(struct inode *inode)
948{ 950{
949 handle_t *result; 951 handle_t *result;
950 952
951 result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); 953 result = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
954 ext4_blocks_for_truncate(inode));
952 if (!IS_ERR(result)) 955 if (!IS_ERR(result))
953 return result; 956 return result;
954 957
@@ -1515,3 +1518,243 @@ out_stop:
1515 trace_ext4_truncate_exit(inode); 1518 trace_ext4_truncate_exit(inode);
1516} 1519}
1517 1520
1521static int free_hole_blocks(handle_t *handle, struct inode *inode,
1522 struct buffer_head *parent_bh, __le32 *i_data,
1523 int level, ext4_lblk_t first,
1524 ext4_lblk_t count, int max)
1525{
1526 struct buffer_head *bh = NULL;
1527 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1528 int ret = 0;
1529 int i, inc;
1530 ext4_lblk_t offset;
1531 __le32 blk;
1532
1533 inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
1534 for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
1535 if (offset >= count + first)
1536 break;
1537 if (*i_data == 0 || (offset + inc) <= first)
1538 continue;
1539 blk = *i_data;
1540 if (level > 0) {
1541 ext4_lblk_t first2;
1542 bh = sb_bread(inode->i_sb, blk);
1543 if (!bh) {
1544 EXT4_ERROR_INODE_BLOCK(inode, blk,
1545 "Read failure");
1546 return -EIO;
1547 }
1548 first2 = (first > offset) ? first - offset : 0;
1549 ret = free_hole_blocks(handle, inode, bh,
1550 (__le32 *)bh->b_data, level - 1,
1551 first2, count - offset,
1552 inode->i_sb->s_blocksize >> 2);
1553 if (ret) {
1554 brelse(bh);
1555 goto err;
1556 }
1557 }
1558 if (level == 0 ||
1559 (bh && all_zeroes((__le32 *)bh->b_data,
1560 (__le32 *)bh->b_data + addr_per_block))) {
1561 ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
1562 *i_data = 0;
1563 }
1564 brelse(bh);
1565 bh = NULL;
1566 }
1567
1568err:
1569 return ret;
1570}
1571
1572static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
1573 ext4_lblk_t first, ext4_lblk_t stop)
1574{
1575 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1576 int level, ret = 0;
1577 int num = EXT4_NDIR_BLOCKS;
1578 ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
1579 __le32 *i_data = EXT4_I(inode)->i_data;
1580
1581 count = stop - first;
1582 for (level = 0; level < 4; level++, max *= addr_per_block) {
1583 if (first < max) {
1584 ret = free_hole_blocks(handle, inode, NULL, i_data,
1585 level, first, count, num);
1586 if (ret)
1587 goto err;
1588 if (count > max - first)
1589 count -= max - first;
1590 else
1591 break;
1592 first = 0;
1593 } else {
1594 first -= max;
1595 }
1596 i_data += num;
1597 if (level == 0) {
1598 num = 1;
1599 max = 1;
1600 }
1601 }
1602
1603err:
1604 return ret;
1605}
1606
1607int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
1608{
1609 struct inode *inode = file->f_path.dentry->d_inode;
1610 struct super_block *sb = inode->i_sb;
1611 ext4_lblk_t first_block, stop_block;
1612 struct address_space *mapping = inode->i_mapping;
1613 handle_t *handle = NULL;
1614 loff_t first_page, last_page, page_len;
1615 loff_t first_page_offset, last_page_offset;
1616 int err = 0;
1617
1618 /*
1619 * Write out all dirty pages to avoid race conditions
1620 * Then release them.
1621 */
1622 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
1623 err = filemap_write_and_wait_range(mapping,
1624 offset, offset + length - 1);
1625 if (err)
1626 return err;
1627 }
1628
1629 mutex_lock(&inode->i_mutex);
1630 /* It's not possible punch hole on append only file */
1631 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
1632 err = -EPERM;
1633 goto out_mutex;
1634 }
1635 if (IS_SWAPFILE(inode)) {
1636 err = -ETXTBSY;
1637 goto out_mutex;
1638 }
1639
1640 /* No need to punch hole beyond i_size */
1641 if (offset >= inode->i_size)
1642 goto out_mutex;
1643
1644 /*
1645 * If the hole extents beyond i_size, set the hole
1646 * to end after the page that contains i_size
1647 */
1648 if (offset + length > inode->i_size) {
1649 length = inode->i_size +
1650 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
1651 offset;
1652 }
1653
1654 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1655 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
1656
1657 first_page_offset = first_page << PAGE_CACHE_SHIFT;
1658 last_page_offset = last_page << PAGE_CACHE_SHIFT;
1659
1660 /* Now release the pages */
1661 if (last_page_offset > first_page_offset) {
1662 truncate_pagecache_range(inode, first_page_offset,
1663 last_page_offset - 1);
1664 }
1665
1666 /* Wait all existing dio works, newcomers will block on i_mutex */
1667 inode_dio_wait(inode);
1668
1669 handle = start_transaction(inode);
1670 if (IS_ERR(handle))
1671 goto out_mutex;
1672
1673 /*
1674 * Now we need to zero out the non-page-aligned data in the
1675 * pages at the start and tail of the hole, and unmap the buffer
1676 * heads for the block aligned regions of the page that were
1677 * completely zerod.
1678 */
1679 if (first_page > last_page) {
1680 /*
1681 * If the file space being truncated is contained within a page
1682 * just zero out and unmap the middle of that page
1683 */
1684 err = ext4_discard_partial_page_buffers(handle,
1685 mapping, offset, length, 0);
1686 if (err)
1687 goto out;
1688 } else {
1689 /*
1690 * Zero out and unmap the paritial page that contains
1691 * the start of the hole
1692 */
1693 page_len = first_page_offset - offset;
1694 if (page_len > 0) {
1695 err = ext4_discard_partial_page_buffers(handle, mapping,
1696 offset, page_len, 0);
1697 if (err)
1698 goto out;
1699 }
1700
1701 /*
1702 * Zero out and unmap the partial page that contains
1703 * the end of the hole
1704 */
1705 page_len = offset + length - last_page_offset;
1706 if (page_len > 0) {
1707 err = ext4_discard_partial_page_buffers(handle, mapping,
1708 last_page_offset, page_len, 0);
1709 if (err)
1710 goto out;
1711 }
1712 }
1713
1714 /*
1715 * If i_size contained in the last page, we need to
1716 * unmap and zero the paritial page after i_size
1717 */
1718 if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
1719 inode->i_size % PAGE_CACHE_SIZE != 0) {
1720 page_len = PAGE_CACHE_SIZE -
1721 (inode->i_size & (PAGE_CACHE_SIZE - 1));
1722 if (page_len > 0) {
1723 err = ext4_discard_partial_page_buffers(handle,
1724 mapping, inode->i_size, page_len, 0);
1725 if (err)
1726 goto out;
1727 }
1728 }
1729
1730 first_block = (offset + sb->s_blocksize - 1) >>
1731 EXT4_BLOCK_SIZE_BITS(sb);
1732 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
1733
1734 if (first_block >= stop_block)
1735 goto out;
1736
1737 down_write(&EXT4_I(inode)->i_data_sem);
1738 ext4_discard_preallocations(inode);
1739
1740 err = ext4_es_remove_extent(inode, first_block,
1741 stop_block - first_block);
1742 err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
1743
1744 ext4_discard_preallocations(inode);
1745
1746 if (IS_SYNC(inode))
1747 ext4_handle_sync(handle);
1748
1749 up_write(&EXT4_I(inode)->i_data_sem);
1750
1751out:
1752 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
1753 ext4_mark_inode_dirty(handle, inode);
1754 ext4_journal_stop(handle);
1755
1756out_mutex:
1757 mutex_unlock(&inode->i_mutex);
1758
1759 return err;
1760}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 387c47c6cda9..c0fd1a123f7d 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -545,7 +545,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
545 return ret; 545 return ret;
546 546
547retry: 547retry:
548 handle = ext4_journal_start(inode, needed_blocks); 548 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
549 if (IS_ERR(handle)) { 549 if (IS_ERR(handle)) {
550 ret = PTR_ERR(handle); 550 ret = PTR_ERR(handle);
551 handle = NULL; 551 handle = NULL;
@@ -657,7 +657,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
657 * The possible write could happen in the inode, 657 * The possible write could happen in the inode,
658 * so try to reserve the space in inode first. 658 * so try to reserve the space in inode first.
659 */ 659 */
660 handle = ext4_journal_start(inode, 1); 660 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
661 if (IS_ERR(handle)) { 661 if (IS_ERR(handle)) {
662 ret = PTR_ERR(handle); 662 ret = PTR_ERR(handle);
663 handle = NULL; 663 handle = NULL;
@@ -853,7 +853,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
853 if (ret) 853 if (ret)
854 return ret; 854 return ret;
855 855
856 handle = ext4_journal_start(inode, 1); 856 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
857 if (IS_ERR(handle)) { 857 if (IS_ERR(handle)) {
858 ret = PTR_ERR(handle); 858 ret = PTR_ERR(handle);
859 handle = NULL; 859 handle = NULL;
@@ -1188,7 +1188,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
1188 1188
1189 data_bh = sb_getblk(inode->i_sb, map.m_pblk); 1189 data_bh = sb_getblk(inode->i_sb, map.m_pblk);
1190 if (!data_bh) { 1190 if (!data_bh) {
1191 error = -EIO; 1191 error = -ENOMEM;
1192 goto out_restore; 1192 goto out_restore;
1193 } 1193 }
1194 1194
@@ -1298,7 +1298,7 @@ int ext4_read_inline_dir(struct file *filp,
1298 int i, stored; 1298 int i, stored;
1299 struct ext4_dir_entry_2 *de; 1299 struct ext4_dir_entry_2 *de;
1300 struct super_block *sb; 1300 struct super_block *sb;
1301 struct inode *inode = filp->f_path.dentry->d_inode; 1301 struct inode *inode = file_inode(filp);
1302 int ret, inline_size = 0; 1302 int ret, inline_size = 0;
1303 struct ext4_iloc iloc; 1303 struct ext4_iloc iloc;
1304 void *dir_buf = NULL; 1304 void *dir_buf = NULL;
@@ -1770,7 +1770,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
1770 1770
1771 1771
1772 needed_blocks = ext4_writepage_trans_blocks(inode); 1772 needed_blocks = ext4_writepage_trans_blocks(inode);
1773 handle = ext4_journal_start(inode, needed_blocks); 1773 handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
1774 if (IS_ERR(handle)) 1774 if (IS_ERR(handle))
1775 return; 1775 return;
1776 1776
@@ -1862,7 +1862,7 @@ int ext4_convert_inline_data(struct inode *inode)
1862 if (error) 1862 if (error)
1863 return error; 1863 return error;
1864 1864
1865 handle = ext4_journal_start(inode, needed_blocks); 1865 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
1866 if (IS_ERR(handle)) { 1866 if (IS_ERR(handle)) {
1867 error = PTR_ERR(handle); 1867 error = PTR_ERR(handle);
1868 goto out_free; 1868 goto out_free;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..9ea0cde3fa9e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -132,10 +132,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
132} 132}
133 133
134static void ext4_invalidatepage(struct page *page, unsigned long offset); 134static void ext4_invalidatepage(struct page *page, unsigned long offset);
135static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
136 struct buffer_head *bh_result, int create);
137static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
138static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
139static int __ext4_journalled_writepage(struct page *page, unsigned int len); 135static int __ext4_journalled_writepage(struct page *page, unsigned int len);
140static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 136static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
141static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 137static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
@@ -238,7 +234,8 @@ void ext4_evict_inode(struct inode *inode)
238 * protection against it 234 * protection against it
239 */ 235 */
240 sb_start_intwrite(inode->i_sb); 236 sb_start_intwrite(inode->i_sb);
241 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); 237 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
238 ext4_blocks_for_truncate(inode)+3);
242 if (IS_ERR(handle)) { 239 if (IS_ERR(handle)) {
243 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 240 ext4_std_error(inode->i_sb, PTR_ERR(handle));
244 /* 241 /*
@@ -346,7 +343,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
346 spin_lock(&ei->i_block_reservation_lock); 343 spin_lock(&ei->i_block_reservation_lock);
347 trace_ext4_da_update_reserve_space(inode, used, quota_claim); 344 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
348 if (unlikely(used > ei->i_reserved_data_blocks)) { 345 if (unlikely(used > ei->i_reserved_data_blocks)) {
349 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 346 ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
350 "with only %d reserved data blocks", 347 "with only %d reserved data blocks",
351 __func__, inode->i_ino, used, 348 __func__, inode->i_ino, used,
352 ei->i_reserved_data_blocks); 349 ei->i_reserved_data_blocks);
@@ -355,10 +352,12 @@ void ext4_da_update_reserve_space(struct inode *inode,
355 } 352 }
356 353
357 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { 354 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
358 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " 355 ext4_warning(inode->i_sb, "ino %lu, allocated %d "
359 "with only %d reserved metadata blocks\n", __func__, 356 "with only %d reserved metadata blocks "
360 inode->i_ino, ei->i_allocated_meta_blocks, 357 "(releasing %d blocks with reserved %d data blocks)",
361 ei->i_reserved_meta_blocks); 358 inode->i_ino, ei->i_allocated_meta_blocks,
359 ei->i_reserved_meta_blocks, used,
360 ei->i_reserved_data_blocks);
362 WARN_ON(1); 361 WARN_ON(1);
363 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; 362 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
364 } 363 }
@@ -508,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
508int ext4_map_blocks(handle_t *handle, struct inode *inode, 507int ext4_map_blocks(handle_t *handle, struct inode *inode,
509 struct ext4_map_blocks *map, int flags) 508 struct ext4_map_blocks *map, int flags)
510{ 509{
510 struct extent_status es;
511 int retval; 511 int retval;
512 512
513 map->m_flags = 0; 513 map->m_flags = 0;
514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
515 "logical block %lu\n", inode->i_ino, flags, map->m_len, 515 "logical block %lu\n", inode->i_ino, flags, map->m_len,
516 (unsigned long) map->m_lblk); 516 (unsigned long) map->m_lblk);
517
518 /* Lookup extent status tree firstly */
519 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
520 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
521 map->m_pblk = ext4_es_pblock(&es) +
522 map->m_lblk - es.es_lblk;
523 map->m_flags |= ext4_es_is_written(&es) ?
524 EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
525 retval = es.es_len - (map->m_lblk - es.es_lblk);
526 if (retval > map->m_len)
527 retval = map->m_len;
528 map->m_len = retval;
529 } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
530 retval = 0;
531 } else {
532 BUG_ON(1);
533 }
534 goto found;
535 }
536
517 /* 537 /*
518 * Try to see if we can get the block without requesting a new 538 * Try to see if we can get the block without requesting a new
519 * file system block. 539 * file system block.
@@ -527,20 +547,27 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
527 retval = ext4_ind_map_blocks(handle, inode, map, flags & 547 retval = ext4_ind_map_blocks(handle, inode, map, flags &
528 EXT4_GET_BLOCKS_KEEP_SIZE); 548 EXT4_GET_BLOCKS_KEEP_SIZE);
529 } 549 }
550 if (retval > 0) {
551 int ret;
552 unsigned long long status;
553
554 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
555 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
556 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
557 ext4_find_delalloc_range(inode, map->m_lblk,
558 map->m_lblk + map->m_len - 1))
559 status |= EXTENT_STATUS_DELAYED;
560 ret = ext4_es_insert_extent(inode, map->m_lblk,
561 map->m_len, map->m_pblk, status);
562 if (ret < 0)
563 retval = ret;
564 }
530 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 565 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
531 up_read((&EXT4_I(inode)->i_data_sem)); 566 up_read((&EXT4_I(inode)->i_data_sem));
532 567
568found:
533 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 569 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
534 int ret; 570 int ret = check_block_validity(inode, map);
535 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
536 /* delayed alloc may be allocated by fallocate and
537 * coverted to initialized by directIO.
538 * we need to handle delayed extent here.
539 */
540 down_write((&EXT4_I(inode)->i_data_sem));
541 goto delayed_mapped;
542 }
543 ret = check_block_validity(inode, map);
544 if (ret != 0) 571 if (ret != 0)
545 return ret; 572 return ret;
546 } 573 }
@@ -560,16 +587,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
560 return retval; 587 return retval;
561 588
562 /* 589 /*
563 * When we call get_blocks without the create flag, the 590 * Here we clear m_flags because after allocating an new extent,
564 * BH_Unwritten flag could have gotten set if the blocks 591 * it will be set again.
565 * requested were part of a uninitialized extent. We need to
566 * clear this flag now that we are committed to convert all or
567 * part of the uninitialized extent to be an initialized
568 * extent. This is because we need to avoid the combination
569 * of BH_Unwritten and BH_Mapped flags being simultaneously
570 * set on the buffer_head.
571 */ 592 */
572 map->m_flags &= ~EXT4_MAP_UNWRITTEN; 593 map->m_flags &= ~EXT4_MAP_FLAGS;
573 594
574 /* 595 /*
575 * New blocks allocate and/or writing to uninitialized extent 596 * New blocks allocate and/or writing to uninitialized extent
@@ -615,18 +636,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
615 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 636 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
616 ext4_da_update_reserve_space(inode, retval, 1); 637 ext4_da_update_reserve_space(inode, retval, 1);
617 } 638 }
618 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { 639 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
619 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 640 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
620 641
621 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 642 if (retval > 0) {
622 int ret; 643 int ret;
623delayed_mapped: 644 unsigned long long status;
624 /* delayed allocation blocks has been allocated */ 645
625 ret = ext4_es_remove_extent(inode, map->m_lblk, 646 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
626 map->m_len); 647 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
627 if (ret < 0) 648 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
628 retval = ret; 649 ext4_find_delalloc_range(inode, map->m_lblk,
629 } 650 map->m_lblk + map->m_len - 1))
651 status |= EXTENT_STATUS_DELAYED;
652 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
653 map->m_pblk, status);
654 if (ret < 0)
655 retval = ret;
630 } 656 }
631 657
632 up_write((&EXT4_I(inode)->i_data_sem)); 658 up_write((&EXT4_I(inode)->i_data_sem));
@@ -660,7 +686,8 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
660 if (map.m_len > DIO_MAX_BLOCKS) 686 if (map.m_len > DIO_MAX_BLOCKS)
661 map.m_len = DIO_MAX_BLOCKS; 687 map.m_len = DIO_MAX_BLOCKS;
662 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); 688 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
663 handle = ext4_journal_start(inode, dio_credits); 689 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
690 dio_credits);
664 if (IS_ERR(handle)) { 691 if (IS_ERR(handle)) {
665 ret = PTR_ERR(handle); 692 ret = PTR_ERR(handle);
666 return ret; 693 return ret;
@@ -707,14 +734,16 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
707 /* ensure we send some value back into *errp */ 734 /* ensure we send some value back into *errp */
708 *errp = 0; 735 *errp = 0;
709 736
737 if (create && err == 0)
738 err = -ENOSPC; /* should never happen */
710 if (err < 0) 739 if (err < 0)
711 *errp = err; 740 *errp = err;
712 if (err <= 0) 741 if (err <= 0)
713 return NULL; 742 return NULL;
714 743
715 bh = sb_getblk(inode->i_sb, map.m_pblk); 744 bh = sb_getblk(inode->i_sb, map.m_pblk);
716 if (!bh) { 745 if (unlikely(!bh)) {
717 *errp = -EIO; 746 *errp = -ENOMEM;
718 return NULL; 747 return NULL;
719 } 748 }
720 if (map.m_flags & EXT4_MAP_NEW) { 749 if (map.m_flags & EXT4_MAP_NEW) {
@@ -808,11 +837,10 @@ int ext4_walk_page_buffers(handle_t *handle,
808 * and the commit_write(). So doing the jbd2_journal_start at the start of 837 * and the commit_write(). So doing the jbd2_journal_start at the start of
809 * prepare_write() is the right place. 838 * prepare_write() is the right place.
810 * 839 *
811 * Also, this function can nest inside ext4_writepage() -> 840 * Also, this function can nest inside ext4_writepage(). In that case, we
812 * block_write_full_page(). In that case, we *know* that ext4_writepage() 841 * *know* that ext4_writepage() has generated enough buffer credits to do the
813 * has generated enough buffer credits to do the whole page. So we won't 842 * whole page. So we won't block on the journal in that case, which is good,
814 * block on the journal in that case, which is good, because the caller may 843 * because the caller may be PF_MEMALLOC.
815 * be PF_MEMALLOC.
816 * 844 *
817 * By accident, ext4 can be reentered when a transaction is open via 845 * By accident, ext4 can be reentered when a transaction is open via
818 * quota file writes. If we were to commit the transaction while thus 846 * quota file writes. If we were to commit the transaction while thus
@@ -878,32 +906,40 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
878 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, 906 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
879 flags, pagep); 907 flags, pagep);
880 if (ret < 0) 908 if (ret < 0)
881 goto out; 909 return ret;
882 if (ret == 1) { 910 if (ret == 1)
883 ret = 0; 911 return 0;
884 goto out;
885 }
886 } 912 }
887 913
888retry: 914 /*
889 handle = ext4_journal_start(inode, needed_blocks); 915 * grab_cache_page_write_begin() can take a long time if the
916 * system is thrashing due to memory pressure, or if the page
917 * is being written back. So grab it first before we start
918 * the transaction handle. This also allows us to allocate
919 * the page (if needed) without using GFP_NOFS.
920 */
921retry_grab:
922 page = grab_cache_page_write_begin(mapping, index, flags);
923 if (!page)
924 return -ENOMEM;
925 unlock_page(page);
926
927retry_journal:
928 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
890 if (IS_ERR(handle)) { 929 if (IS_ERR(handle)) {
891 ret = PTR_ERR(handle); 930 page_cache_release(page);
892 goto out; 931 return PTR_ERR(handle);
893 } 932 }
894 933
895 /* We cannot recurse into the filesystem as the transaction is already 934 lock_page(page);
896 * started */ 935 if (page->mapping != mapping) {
897 flags |= AOP_FLAG_NOFS; 936 /* The page got truncated from under us */
898 937 unlock_page(page);
899 page = grab_cache_page_write_begin(mapping, index, flags); 938 page_cache_release(page);
900 if (!page) {
901 ext4_journal_stop(handle); 939 ext4_journal_stop(handle);
902 ret = -ENOMEM; 940 goto retry_grab;
903 goto out;
904 } 941 }
905 942 wait_on_page_writeback(page);
906 *pagep = page;
907 943
908 if (ext4_should_dioread_nolock(inode)) 944 if (ext4_should_dioread_nolock(inode))
909 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 945 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -918,7 +954,6 @@ retry:
918 954
919 if (ret) { 955 if (ret) {
920 unlock_page(page); 956 unlock_page(page);
921 page_cache_release(page);
922 /* 957 /*
923 * __block_write_begin may have instantiated a few blocks 958 * __block_write_begin may have instantiated a few blocks
924 * outside i_size. Trim these off again. Don't need 959 * outside i_size. Trim these off again. Don't need
@@ -942,11 +977,14 @@ retry:
942 if (inode->i_nlink) 977 if (inode->i_nlink)
943 ext4_orphan_del(NULL, inode); 978 ext4_orphan_del(NULL, inode);
944 } 979 }
945 }
946 980
947 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 981 if (ret == -ENOSPC &&
948 goto retry; 982 ext4_should_retry_alloc(inode->i_sb, &retries))
949out: 983 goto retry_journal;
984 page_cache_release(page);
985 return ret;
986 }
987 *pagep = page;
950 return ret; 988 return ret;
951} 989}
952 990
@@ -1256,7 +1294,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1256 * function is called from invalidate page, it's 1294 * function is called from invalidate page, it's
1257 * harmless to return without any action. 1295 * harmless to return without any action.
1258 */ 1296 */
1259 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " 1297 ext4_warning(inode->i_sb, "ext4_da_release_space: "
1260 "ino %lu, to_free %d with only %d reserved " 1298 "ino %lu, to_free %d with only %d reserved "
1261 "data blocks", inode->i_ino, to_free, 1299 "data blocks", inode->i_ino, to_free,
1262 ei->i_reserved_data_blocks); 1300 ei->i_reserved_data_blocks);
@@ -1357,7 +1395,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1357 loff_t size = i_size_read(inode); 1395 loff_t size = i_size_read(inode);
1358 unsigned int len, block_start; 1396 unsigned int len, block_start;
1359 struct buffer_head *bh, *page_bufs = NULL; 1397 struct buffer_head *bh, *page_bufs = NULL;
1360 int journal_data = ext4_should_journal_data(inode);
1361 sector_t pblock = 0, cur_logical = 0; 1398 sector_t pblock = 0, cur_logical = 0;
1362 struct ext4_io_submit io_submit; 1399 struct ext4_io_submit io_submit;
1363 1400
@@ -1378,7 +1415,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1378 if (nr_pages == 0) 1415 if (nr_pages == 0)
1379 break; 1416 break;
1380 for (i = 0; i < nr_pages; i++) { 1417 for (i = 0; i < nr_pages; i++) {
1381 int commit_write = 0, skip_page = 0; 1418 int skip_page = 0;
1382 struct page *page = pvec.pages[i]; 1419 struct page *page = pvec.pages[i];
1383 1420
1384 index = page->index; 1421 index = page->index;
@@ -1400,27 +1437,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1400 BUG_ON(!PageLocked(page)); 1437 BUG_ON(!PageLocked(page));
1401 BUG_ON(PageWriteback(page)); 1438 BUG_ON(PageWriteback(page));
1402 1439
1403 /*
1404 * If the page does not have buffers (for
1405 * whatever reason), try to create them using
1406 * __block_write_begin. If this fails,
1407 * skip the page and move on.
1408 */
1409 if (!page_has_buffers(page)) {
1410 if (__block_write_begin(page, 0, len,
1411 noalloc_get_block_write)) {
1412 skip_page:
1413 unlock_page(page);
1414 continue;
1415 }
1416 commit_write = 1;
1417 }
1418
1419 bh = page_bufs = page_buffers(page); 1440 bh = page_bufs = page_buffers(page);
1420 block_start = 0; 1441 block_start = 0;
1421 do { 1442 do {
1422 if (!bh)
1423 goto skip_page;
1424 if (map && (cur_logical >= map->m_lblk) && 1443 if (map && (cur_logical >= map->m_lblk) &&
1425 (cur_logical <= (map->m_lblk + 1444 (cur_logical <= (map->m_lblk +
1426 (map->m_len - 1)))) { 1445 (map->m_len - 1)))) {
@@ -1448,33 +1467,14 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1448 pblock++; 1467 pblock++;
1449 } while (bh != page_bufs); 1468 } while (bh != page_bufs);
1450 1469
1451 if (skip_page) 1470 if (skip_page) {
1452 goto skip_page; 1471 unlock_page(page);
1453 1472 continue;
1454 if (commit_write) 1473 }
1455 /* mark the buffer_heads as dirty & uptodate */
1456 block_commit_write(page, 0, len);
1457 1474
1458 clear_page_dirty_for_io(page); 1475 clear_page_dirty_for_io(page);
1459 /* 1476 err = ext4_bio_write_page(&io_submit, page, len,
1460 * Delalloc doesn't support data journalling, 1477 mpd->wbc);
1461 * but eventually maybe we'll lift this
1462 * restriction.
1463 */
1464 if (unlikely(journal_data && PageChecked(page)))
1465 err = __ext4_journalled_writepage(page, len);
1466 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
1467 err = ext4_bio_write_page(&io_submit, page,
1468 len, mpd->wbc);
1469 else if (buffer_uninit(page_bufs)) {
1470 ext4_set_bh_endio(page_bufs, inode);
1471 err = block_write_full_page_endio(page,
1472 noalloc_get_block_write,
1473 mpd->wbc, ext4_end_io_buffer_write);
1474 } else
1475 err = block_write_full_page(page,
1476 noalloc_get_block_write, mpd->wbc);
1477
1478 if (!err) 1478 if (!err)
1479 mpd->pages_written++; 1479 mpd->pages_written++;
1480 /* 1480 /*
@@ -1640,7 +1640,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1640 (unsigned long long) next, 1640 (unsigned long long) next,
1641 mpd->b_size >> mpd->inode->i_blkbits, err); 1641 mpd->b_size >> mpd->inode->i_blkbits, err);
1642 ext4_msg(sb, KERN_CRIT, 1642 ext4_msg(sb, KERN_CRIT,
1643 "This should not happen!! Data will be lost\n"); 1643 "This should not happen!! Data will be lost");
1644 if (err == -ENOSPC) 1644 if (err == -ENOSPC)
1645 ext4_print_free_blocks(mpd->inode); 1645 ext4_print_free_blocks(mpd->inode);
1646 } 1646 }
@@ -1690,16 +1690,16 @@ submit_io:
1690 * 1690 *
1691 * @mpd->lbh - extent of blocks 1691 * @mpd->lbh - extent of blocks
1692 * @logical - logical number of the block in the file 1692 * @logical - logical number of the block in the file
1693 * @bh - bh of the block (used to access block's state) 1693 * @b_state - b_state of the buffer head added
1694 * 1694 *
1695 * the function is used to collect contig. blocks in same state 1695 * the function is used to collect contig. blocks in same state
1696 */ 1696 */
1697static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 1697static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical,
1698 sector_t logical, size_t b_size,
1699 unsigned long b_state) 1698 unsigned long b_state)
1700{ 1699{
1701 sector_t next; 1700 sector_t next;
1702 int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; 1701 int blkbits = mpd->inode->i_blkbits;
1702 int nrblocks = mpd->b_size >> blkbits;
1703 1703
1704 /* 1704 /*
1705 * XXX Don't go larger than mballoc is willing to allocate 1705 * XXX Don't go larger than mballoc is willing to allocate
@@ -1707,11 +1707,11 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1707 * mpage_da_submit_io() into this function and then call 1707 * mpage_da_submit_io() into this function and then call
1708 * ext4_map_blocks() multiple times in a loop 1708 * ext4_map_blocks() multiple times in a loop
1709 */ 1709 */
1710 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) 1710 if (nrblocks >= (8*1024*1024 >> blkbits))
1711 goto flush_it; 1711 goto flush_it;
1712 1712
1713 /* check if thereserved journal credits might overflow */ 1713 /* check if the reserved journal credits might overflow */
1714 if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { 1714 if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) {
1715 if (nrblocks >= EXT4_MAX_TRANS_DATA) { 1715 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1716 /* 1716 /*
1717 * With non-extent format we are limited by the journal 1717 * With non-extent format we are limited by the journal
@@ -1720,16 +1720,6 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1720 * nrblocks. So limit nrblocks. 1720 * nrblocks. So limit nrblocks.
1721 */ 1721 */
1722 goto flush_it; 1722 goto flush_it;
1723 } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
1724 EXT4_MAX_TRANS_DATA) {
1725 /*
1726 * Adding the new buffer_head would make it cross the
1727 * allowed limit for which we have journal credit
1728 * reserved. So limit the new bh->b_size
1729 */
1730 b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
1731 mpd->inode->i_blkbits;
1732 /* we will do mpage_da_submit_io in the next loop */
1733 } 1723 }
1734 } 1724 }
1735 /* 1725 /*
@@ -1737,7 +1727,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1737 */ 1727 */
1738 if (mpd->b_size == 0) { 1728 if (mpd->b_size == 0) {
1739 mpd->b_blocknr = logical; 1729 mpd->b_blocknr = logical;
1740 mpd->b_size = b_size; 1730 mpd->b_size = 1 << blkbits;
1741 mpd->b_state = b_state & BH_FLAGS; 1731 mpd->b_state = b_state & BH_FLAGS;
1742 return; 1732 return;
1743 } 1733 }
@@ -1747,7 +1737,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1747 * Can we merge the block to our big extent? 1737 * Can we merge the block to our big extent?
1748 */ 1738 */
1749 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { 1739 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
1750 mpd->b_size += b_size; 1740 mpd->b_size += 1 << blkbits;
1751 return; 1741 return;
1752 } 1742 }
1753 1743
@@ -1775,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1775 struct ext4_map_blocks *map, 1765 struct ext4_map_blocks *map,
1776 struct buffer_head *bh) 1766 struct buffer_head *bh)
1777{ 1767{
1768 struct extent_status es;
1778 int retval; 1769 int retval;
1779 sector_t invalid_block = ~((sector_t) 0xffff); 1770 sector_t invalid_block = ~((sector_t) 0xffff);
1780 1771
@@ -1785,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1785 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," 1776 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1786 "logical block %lu\n", inode->i_ino, map->m_len, 1777 "logical block %lu\n", inode->i_ino, map->m_len,
1787 (unsigned long) map->m_lblk); 1778 (unsigned long) map->m_lblk);
1779
1780 /* Lookup extent status tree firstly */
1781 if (ext4_es_lookup_extent(inode, iblock, &es)) {
1782
1783 if (ext4_es_is_hole(&es)) {
1784 retval = 0;
1785 down_read((&EXT4_I(inode)->i_data_sem));
1786 goto add_delayed;
1787 }
1788
1789 /*
1790 * Delayed extent could be allocated by fallocate.
1791 * So we need to check it.
1792 */
1793 if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
1794 map_bh(bh, inode->i_sb, invalid_block);
1795 set_buffer_new(bh);
1796 set_buffer_delay(bh);
1797 return 0;
1798 }
1799
1800 map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
1801 retval = es.es_len - (iblock - es.es_lblk);
1802 if (retval > map->m_len)
1803 retval = map->m_len;
1804 map->m_len = retval;
1805 if (ext4_es_is_written(&es))
1806 map->m_flags |= EXT4_MAP_MAPPED;
1807 else if (ext4_es_is_unwritten(&es))
1808 map->m_flags |= EXT4_MAP_UNWRITTEN;
1809 else
1810 BUG_ON(1);
1811
1812 return retval;
1813 }
1814
1788 /* 1815 /*
1789 * Try to see if we can get the block without requesting a new 1816 * Try to see if we can get the block without requesting a new
1790 * file system block. 1817 * file system block.
@@ -1803,11 +1830,15 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1803 map->m_flags |= EXT4_MAP_FROM_CLUSTER; 1830 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
1804 retval = 0; 1831 retval = 0;
1805 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1832 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1806 retval = ext4_ext_map_blocks(NULL, inode, map, 0); 1833 retval = ext4_ext_map_blocks(NULL, inode, map,
1834 EXT4_GET_BLOCKS_NO_PUT_HOLE);
1807 else 1835 else
1808 retval = ext4_ind_map_blocks(NULL, inode, map, 0); 1836 retval = ext4_ind_map_blocks(NULL, inode, map,
1837 EXT4_GET_BLOCKS_NO_PUT_HOLE);
1809 1838
1839add_delayed:
1810 if (retval == 0) { 1840 if (retval == 0) {
1841 int ret;
1811 /* 1842 /*
1812 * XXX: __block_prepare_write() unmaps passed block, 1843 * XXX: __block_prepare_write() unmaps passed block,
1813 * is it OK? 1844 * is it OK?
@@ -1815,15 +1846,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1815 /* If the block was allocated from previously allocated cluster, 1846 /* If the block was allocated from previously allocated cluster,
1816 * then we dont need to reserve it again. */ 1847 * then we dont need to reserve it again. */
1817 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { 1848 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1818 retval = ext4_da_reserve_space(inode, iblock); 1849 ret = ext4_da_reserve_space(inode, iblock);
1819 if (retval) 1850 if (ret) {
1820 /* not enough space to reserve */ 1851 /* not enough space to reserve */
1852 retval = ret;
1821 goto out_unlock; 1853 goto out_unlock;
1854 }
1822 } 1855 }
1823 1856
1824 retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); 1857 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1825 if (retval) 1858 ~0, EXTENT_STATUS_DELAYED);
1859 if (ret) {
1860 retval = ret;
1826 goto out_unlock; 1861 goto out_unlock;
1862 }
1827 1863
1828 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served 1864 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1829 * and it should not appear on the bh->b_state. 1865 * and it should not appear on the bh->b_state.
@@ -1833,6 +1869,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1833 map_bh(bh, inode->i_sb, invalid_block); 1869 map_bh(bh, inode->i_sb, invalid_block);
1834 set_buffer_new(bh); 1870 set_buffer_new(bh);
1835 set_buffer_delay(bh); 1871 set_buffer_delay(bh);
1872 } else if (retval > 0) {
1873 int ret;
1874 unsigned long long status;
1875
1876 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1877 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1878 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1879 map->m_pblk, status);
1880 if (ret != 0)
1881 retval = ret;
1836 } 1882 }
1837 1883
1838out_unlock: 1884out_unlock:
@@ -1890,27 +1936,6 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1890 return 0; 1936 return 0;
1891} 1937}
1892 1938
1893/*
1894 * This function is used as a standard get_block_t calback function
1895 * when there is no desire to allocate any blocks. It is used as a
1896 * callback function for block_write_begin() and block_write_full_page().
1897 * These functions should only try to map a single block at a time.
1898 *
1899 * Since this function doesn't do block allocations even if the caller
1900 * requests it by passing in create=1, it is critically important that
1901 * any caller checks to make sure that any buffer heads are returned
1902 * by this function are either all already mapped or marked for
1903 * delayed allocation before calling block_write_full_page(). Otherwise,
1904 * b_blocknr could be left unitialized, and the page write functions will
1905 * be taken by surprise.
1906 */
1907static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
1908 struct buffer_head *bh_result, int create)
1909{
1910 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
1911 return _ext4_get_block(inode, iblock, bh_result, 0);
1912}
1913
1914static int bget_one(handle_t *handle, struct buffer_head *bh) 1939static int bget_one(handle_t *handle, struct buffer_head *bh)
1915{ 1940{
1916 get_bh(bh); 1941 get_bh(bh);
@@ -1955,7 +1980,8 @@ static int __ext4_journalled_writepage(struct page *page,
1955 * references to buffers so we are safe */ 1980 * references to buffers so we are safe */
1956 unlock_page(page); 1981 unlock_page(page);
1957 1982
1958 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 1983 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
1984 ext4_writepage_trans_blocks(inode));
1959 if (IS_ERR(handle)) { 1985 if (IS_ERR(handle)) {
1960 ret = PTR_ERR(handle); 1986 ret = PTR_ERR(handle);
1961 goto out; 1987 goto out;
@@ -2035,11 +2061,12 @@ out:
2035static int ext4_writepage(struct page *page, 2061static int ext4_writepage(struct page *page,
2036 struct writeback_control *wbc) 2062 struct writeback_control *wbc)
2037{ 2063{
2038 int ret = 0, commit_write = 0; 2064 int ret = 0;
2039 loff_t size; 2065 loff_t size;
2040 unsigned int len; 2066 unsigned int len;
2041 struct buffer_head *page_bufs = NULL; 2067 struct buffer_head *page_bufs = NULL;
2042 struct inode *inode = page->mapping->host; 2068 struct inode *inode = page->mapping->host;
2069 struct ext4_io_submit io_submit;
2043 2070
2044 trace_ext4_writepage(page); 2071 trace_ext4_writepage(page);
2045 size = i_size_read(inode); 2072 size = i_size_read(inode);
@@ -2048,39 +2075,29 @@ static int ext4_writepage(struct page *page,
2048 else 2075 else
2049 len = PAGE_CACHE_SIZE; 2076 len = PAGE_CACHE_SIZE;
2050 2077
2078 page_bufs = page_buffers(page);
2051 /* 2079 /*
2052 * If the page does not have buffers (for whatever reason), 2080 * We cannot do block allocation or other extent handling in this
2053 * try to create them using __block_write_begin. If this 2081 * function. If there are buffers needing that, we have to redirty
2054 * fails, redirty the page and move on. 2082 * the page. But we may reach here when we do a journal commit via
2083 * journal_submit_inode_data_buffers() and in that case we must write
2084 * allocated buffers to achieve data=ordered mode guarantees.
2055 */ 2085 */
2056 if (!page_has_buffers(page)) { 2086 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2057 if (__block_write_begin(page, 0, len, 2087 ext4_bh_delay_or_unwritten)) {
2058 noalloc_get_block_write)) { 2088 redirty_page_for_writepage(wbc, page);
2059 redirty_page: 2089 if (current->flags & PF_MEMALLOC) {
2060 redirty_page_for_writepage(wbc, page); 2090 /*
2091 * For memory cleaning there's no point in writing only
2092 * some buffers. So just bail out. Warn if we came here
2093 * from direct reclaim.
2094 */
2095 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD))
2096 == PF_MEMALLOC);
2061 unlock_page(page); 2097 unlock_page(page);
2062 return 0; 2098 return 0;
2063 } 2099 }
2064 commit_write = 1;
2065 }
2066 page_bufs = page_buffers(page);
2067 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2068 ext4_bh_delay_or_unwritten)) {
2069 /*
2070 * We don't want to do block allocation, so redirty
2071 * the page and return. We may reach here when we do
2072 * a journal commit via journal_submit_inode_data_buffers.
2073 * We can also reach here via shrink_page_list but it
2074 * should never be for direct reclaim so warn if that
2075 * happens
2076 */
2077 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
2078 PF_MEMALLOC);
2079 goto redirty_page;
2080 } 2100 }
2081 if (commit_write)
2082 /* now mark the buffer_heads as dirty and uptodate */
2083 block_commit_write(page, 0, len);
2084 2101
2085 if (PageChecked(page) && ext4_should_journal_data(inode)) 2102 if (PageChecked(page) && ext4_should_journal_data(inode))
2086 /* 2103 /*
@@ -2089,14 +2106,9 @@ static int ext4_writepage(struct page *page,
2089 */ 2106 */
2090 return __ext4_journalled_writepage(page, len); 2107 return __ext4_journalled_writepage(page, len);
2091 2108
2092 if (buffer_uninit(page_bufs)) { 2109 memset(&io_submit, 0, sizeof(io_submit));
2093 ext4_set_bh_endio(page_bufs, inode); 2110 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2094 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2111 ext4_io_submit(&io_submit);
2095 wbc, ext4_end_io_buffer_write);
2096 } else
2097 ret = block_write_full_page(page, noalloc_get_block_write,
2098 wbc);
2099
2100 return ret; 2112 return ret;
2101} 2113}
2102 2114
@@ -2228,51 +2240,38 @@ static int write_cache_pages_da(handle_t *handle,
2228 logical = (sector_t) page->index << 2240 logical = (sector_t) page->index <<
2229 (PAGE_CACHE_SHIFT - inode->i_blkbits); 2241 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2230 2242
2231 if (!page_has_buffers(page)) { 2243 /* Add all dirty buffers to mpd */
2232 mpage_add_bh_to_extent(mpd, logical, 2244 head = page_buffers(page);
2233 PAGE_CACHE_SIZE, 2245 bh = head;
2234 (1 << BH_Dirty) | (1 << BH_Uptodate)); 2246 do {
2235 if (mpd->io_done) 2247 BUG_ON(buffer_locked(bh));
2236 goto ret_extent_tail;
2237 } else {
2238 /* 2248 /*
2239 * Page with regular buffer heads, 2249 * We need to try to allocate unmapped blocks
2240 * just add all dirty ones 2250 * in the same page. Otherwise we won't make
2251 * progress with the page in ext4_writepage
2241 */ 2252 */
2242 head = page_buffers(page); 2253 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2243 bh = head; 2254 mpage_add_bh_to_extent(mpd, logical,
2244 do { 2255 bh->b_state);
2245 BUG_ON(buffer_locked(bh)); 2256 if (mpd->io_done)
2257 goto ret_extent_tail;
2258 } else if (buffer_dirty(bh) &&
2259 buffer_mapped(bh)) {
2246 /* 2260 /*
2247 * We need to try to allocate 2261 * mapped dirty buffer. We need to
2248 * unmapped blocks in the same page. 2262 * update the b_state because we look
2249 * Otherwise we won't make progress 2263 * at b_state in mpage_da_map_blocks.
2250 * with the page in ext4_writepage 2264 * We don't update b_size because if we
2265 * find an unmapped buffer_head later
2266 * we need to use the b_state flag of
2267 * that buffer_head.
2251 */ 2268 */
2252 if (ext4_bh_delay_or_unwritten(NULL, bh)) { 2269 if (mpd->b_size == 0)
2253 mpage_add_bh_to_extent(mpd, logical, 2270 mpd->b_state =
2254 bh->b_size, 2271 bh->b_state & BH_FLAGS;
2255 bh->b_state); 2272 }
2256 if (mpd->io_done) 2273 logical++;
2257 goto ret_extent_tail; 2274 } while ((bh = bh->b_this_page) != head);
2258 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2259 /*
2260 * mapped dirty buffer. We need
2261 * to update the b_state
2262 * because we look at b_state
2263 * in mpage_da_map_blocks. We
2264 * don't update b_size because
2265 * if we find an unmapped
2266 * buffer_head later we need to
2267 * use the b_state flag of that
2268 * buffer_head.
2269 */
2270 if (mpd->b_size == 0)
2271 mpd->b_state = bh->b_state & BH_FLAGS;
2272 }
2273 logical++;
2274 } while ((bh = bh->b_this_page) != head);
2275 }
2276 2275
2277 if (nr_to_write > 0) { 2276 if (nr_to_write > 0) {
2278 nr_to_write--; 2277 nr_to_write--;
@@ -2413,7 +2412,8 @@ retry:
2413 needed_blocks = ext4_da_writepages_trans_blocks(inode); 2412 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2414 2413
2415 /* start a new transaction*/ 2414 /* start a new transaction*/
2416 handle = ext4_journal_start(inode, needed_blocks); 2415 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2416 needed_blocks);
2417 if (IS_ERR(handle)) { 2417 if (IS_ERR(handle)) {
2418 ret = PTR_ERR(handle); 2418 ret = PTR_ERR(handle);
2419 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2419 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb)
2512 /* 2512 /*
2513 * Start pushing delalloc when 1/2 of free blocks are dirty. 2513 * Start pushing delalloc when 1/2 of free blocks are dirty.
2514 */ 2514 */
2515 if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && 2515 if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
2516 !writeback_in_progress(sb->s_bdi) && 2516 try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2517 down_read_trylock(&sb->s_umount)) {
2518 writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2519 up_read(&sb->s_umount);
2520 }
2521 2517
2522 if (2 * free_blocks < 3 * dirty_blocks || 2518 if (2 * free_blocks < 3 * dirty_blocks ||
2523 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { 2519 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
@@ -2555,42 +2551,52 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2555 pos, len, flags, 2551 pos, len, flags,
2556 pagep, fsdata); 2552 pagep, fsdata);
2557 if (ret < 0) 2553 if (ret < 0)
2558 goto out; 2554 return ret;
2559 if (ret == 1) { 2555 if (ret == 1)
2560 ret = 0; 2556 return 0;
2561 goto out;
2562 }
2563 } 2557 }
2564 2558
2565retry: 2559 /*
2560 * grab_cache_page_write_begin() can take a long time if the
2561 * system is thrashing due to memory pressure, or if the page
2562 * is being written back. So grab it first before we start
2563 * the transaction handle. This also allows us to allocate
2564 * the page (if needed) without using GFP_NOFS.
2565 */
2566retry_grab:
2567 page = grab_cache_page_write_begin(mapping, index, flags);
2568 if (!page)
2569 return -ENOMEM;
2570 unlock_page(page);
2571
2566 /* 2572 /*
2567 * With delayed allocation, we don't log the i_disksize update 2573 * With delayed allocation, we don't log the i_disksize update
2568 * if there is delayed block allocation. But we still need 2574 * if there is delayed block allocation. But we still need
2569 * to journalling the i_disksize update if writes to the end 2575 * to journalling the i_disksize update if writes to the end
2570 * of file which has an already mapped buffer. 2576 * of file which has an already mapped buffer.
2571 */ 2577 */
2572 handle = ext4_journal_start(inode, 1); 2578retry_journal:
2579 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
2573 if (IS_ERR(handle)) { 2580 if (IS_ERR(handle)) {
2574 ret = PTR_ERR(handle); 2581 page_cache_release(page);
2575 goto out; 2582 return PTR_ERR(handle);
2576 } 2583 }
2577 /* We cannot recurse into the filesystem as the transaction is already
2578 * started */
2579 flags |= AOP_FLAG_NOFS;
2580 2584
2581 page = grab_cache_page_write_begin(mapping, index, flags); 2585 lock_page(page);
2582 if (!page) { 2586 if (page->mapping != mapping) {
2587 /* The page got truncated from under us */
2588 unlock_page(page);
2589 page_cache_release(page);
2583 ext4_journal_stop(handle); 2590 ext4_journal_stop(handle);
2584 ret = -ENOMEM; 2591 goto retry_grab;
2585 goto out;
2586 } 2592 }
2587 *pagep = page; 2593 /* In case writeback began while the page was unlocked */
2594 wait_on_page_writeback(page);
2588 2595
2589 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2596 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2590 if (ret < 0) { 2597 if (ret < 0) {
2591 unlock_page(page); 2598 unlock_page(page);
2592 ext4_journal_stop(handle); 2599 ext4_journal_stop(handle);
2593 page_cache_release(page);
2594 /* 2600 /*
2595 * block_write_begin may have instantiated a few blocks 2601 * block_write_begin may have instantiated a few blocks
2596 * outside i_size. Trim these off again. Don't need 2602 * outside i_size. Trim these off again. Don't need
@@ -2598,11 +2604,16 @@ retry:
2598 */ 2604 */
2599 if (pos + len > inode->i_size) 2605 if (pos + len > inode->i_size)
2600 ext4_truncate_failed_write(inode); 2606 ext4_truncate_failed_write(inode);
2607
2608 if (ret == -ENOSPC &&
2609 ext4_should_retry_alloc(inode->i_sb, &retries))
2610 goto retry_journal;
2611
2612 page_cache_release(page);
2613 return ret;
2601 } 2614 }
2602 2615
2603 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2616 *pagep = page;
2604 goto retry;
2605out:
2606 return ret; 2617 return ret;
2607} 2618}
2608 2619
@@ -2858,36 +2869,10 @@ ext4_readpages(struct file *file, struct address_space *mapping,
2858 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 2869 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
2859} 2870}
2860 2871
2861static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
2862{
2863 struct buffer_head *head, *bh;
2864 unsigned int curr_off = 0;
2865
2866 if (!page_has_buffers(page))
2867 return;
2868 head = bh = page_buffers(page);
2869 do {
2870 if (offset <= curr_off && test_clear_buffer_uninit(bh)
2871 && bh->b_private) {
2872 ext4_free_io_end(bh->b_private);
2873 bh->b_private = NULL;
2874 bh->b_end_io = NULL;
2875 }
2876 curr_off = curr_off + bh->b_size;
2877 bh = bh->b_this_page;
2878 } while (bh != head);
2879}
2880
2881static void ext4_invalidatepage(struct page *page, unsigned long offset) 2872static void ext4_invalidatepage(struct page *page, unsigned long offset)
2882{ 2873{
2883 trace_ext4_invalidatepage(page, offset); 2874 trace_ext4_invalidatepage(page, offset);
2884 2875
2885 /*
2886 * free any io_end structure allocated for buffers to be discarded
2887 */
2888 if (ext4_should_dioread_nolock(page->mapping->host))
2889 ext4_invalidatepage_free_endio(page, offset);
2890
2891 /* No journalling happens on data buffers when this function is used */ 2876 /* No journalling happens on data buffers when this function is used */
2892 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); 2877 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
2893 2878
@@ -2959,7 +2944,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2959 ssize_t size, void *private, int ret, 2944 ssize_t size, void *private, int ret,
2960 bool is_async) 2945 bool is_async)
2961{ 2946{
2962 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 2947 struct inode *inode = file_inode(iocb->ki_filp);
2963 ext4_io_end_t *io_end = iocb->private; 2948 ext4_io_end_t *io_end = iocb->private;
2964 2949
2965 /* if not async direct IO or dio with 0 bytes write, just return */ 2950 /* if not async direct IO or dio with 0 bytes write, just return */
@@ -2977,9 +2962,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2977 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2962 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
2978 ext4_free_io_end(io_end); 2963 ext4_free_io_end(io_end);
2979out: 2964out:
2965 inode_dio_done(inode);
2980 if (is_async) 2966 if (is_async)
2981 aio_complete(iocb, ret, 0); 2967 aio_complete(iocb, ret, 0);
2982 inode_dio_done(inode);
2983 return; 2968 return;
2984 } 2969 }
2985 2970
@@ -2993,65 +2978,6 @@ out:
2993 ext4_add_complete_io(io_end); 2978 ext4_add_complete_io(io_end);
2994} 2979}
2995 2980
2996static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2997{
2998 ext4_io_end_t *io_end = bh->b_private;
2999 struct inode *inode;
3000
3001 if (!test_clear_buffer_uninit(bh) || !io_end)
3002 goto out;
3003
3004 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
3005 ext4_msg(io_end->inode->i_sb, KERN_INFO,
3006 "sb umounted, discard end_io request for inode %lu",
3007 io_end->inode->i_ino);
3008 ext4_free_io_end(io_end);
3009 goto out;
3010 }
3011
3012 /*
3013 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
3014 * but being more careful is always safe for the future change.
3015 */
3016 inode = io_end->inode;
3017 ext4_set_io_unwritten_flag(inode, io_end);
3018 ext4_add_complete_io(io_end);
3019out:
3020 bh->b_private = NULL;
3021 bh->b_end_io = NULL;
3022 clear_buffer_uninit(bh);
3023 end_buffer_async_write(bh, uptodate);
3024}
3025
3026static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3027{
3028 ext4_io_end_t *io_end;
3029 struct page *page = bh->b_page;
3030 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
3031 size_t size = bh->b_size;
3032
3033retry:
3034 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3035 if (!io_end) {
3036 pr_warn_ratelimited("%s: allocation fail\n", __func__);
3037 schedule();
3038 goto retry;
3039 }
3040 io_end->offset = offset;
3041 io_end->size = size;
3042 /*
3043 * We need to hold a reference to the page to make sure it
3044 * doesn't get evicted before ext4_end_io_work() has a chance
3045 * to convert the extent from written to unwritten.
3046 */
3047 io_end->page = page;
3048 get_page(io_end->page);
3049
3050 bh->b_private = io_end;
3051 bh->b_end_io = ext4_end_io_buffer_write;
3052 return 0;
3053}
3054
3055/* 2981/*
3056 * For ext4 extent files, ext4 will do direct-io write to holes, 2982 * For ext4 extent files, ext4 will do direct-io write to holes,
3057 * preallocated extents, and those write extend the file, no need to 2983 * preallocated extents, and those write extend the file, no need to
@@ -3553,20 +3479,20 @@ int ext4_can_truncate(struct inode *inode)
3553 3479
3554int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) 3480int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3555{ 3481{
3556 struct inode *inode = file->f_path.dentry->d_inode; 3482 struct inode *inode = file_inode(file);
3557 if (!S_ISREG(inode->i_mode)) 3483 if (!S_ISREG(inode->i_mode))
3558 return -EOPNOTSUPP; 3484 return -EOPNOTSUPP;
3559 3485
3560 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 3486 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3561 /* TODO: Add support for non extent hole punching */ 3487 return ext4_ind_punch_hole(file, offset, length);
3562 return -EOPNOTSUPP;
3563 }
3564 3488
3565 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { 3489 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3566 /* TODO: Add support for bigalloc file systems */ 3490 /* TODO: Add support for bigalloc file systems */
3567 return -EOPNOTSUPP; 3491 return -EOPNOTSUPP;
3568 } 3492 }
3569 3493
3494 trace_ext4_punch_hole(inode, offset, length);
3495
3570 return ext4_ext_punch_hole(file, offset, length); 3496 return ext4_ext_punch_hole(file, offset, length);
3571} 3497}
3572 3498
@@ -3660,11 +3586,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
3660 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); 3586 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3661 3587
3662 bh = sb_getblk(sb, block); 3588 bh = sb_getblk(sb, block);
3663 if (!bh) { 3589 if (unlikely(!bh))
3664 EXT4_ERROR_INODE_BLOCK(inode, block, 3590 return -ENOMEM;
3665 "unable to read itable block");
3666 return -EIO;
3667 }
3668 if (!buffer_uptodate(bh)) { 3591 if (!buffer_uptodate(bh)) {
3669 lock_buffer(bh); 3592 lock_buffer(bh);
3670 3593
@@ -3696,7 +3619,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
3696 3619
3697 /* Is the inode bitmap in cache? */ 3620 /* Is the inode bitmap in cache? */
3698 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); 3621 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3699 if (!bitmap_bh) 3622 if (unlikely(!bitmap_bh))
3700 goto make_io; 3623 goto make_io;
3701 3624
3702 /* 3625 /*
@@ -4404,8 +4327,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4404 4327
4405 /* (user+group)*(old+new) structure, inode write (sb, 4328 /* (user+group)*(old+new) structure, inode write (sb,
4406 * inode block, ? - but truncate inode update has it) */ 4329 * inode block, ? - but truncate inode update has it) */
4407 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ 4330 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
4408 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); 4331 (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) +
4332 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3);
4409 if (IS_ERR(handle)) { 4333 if (IS_ERR(handle)) {
4410 error = PTR_ERR(handle); 4334 error = PTR_ERR(handle);
4411 goto err_out; 4335 goto err_out;
@@ -4440,7 +4364,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4440 (attr->ia_size < inode->i_size)) { 4364 (attr->ia_size < inode->i_size)) {
4441 handle_t *handle; 4365 handle_t *handle;
4442 4366
4443 handle = ext4_journal_start(inode, 3); 4367 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4444 if (IS_ERR(handle)) { 4368 if (IS_ERR(handle)) {
4445 error = PTR_ERR(handle); 4369 error = PTR_ERR(handle);
4446 goto err_out; 4370 goto err_out;
@@ -4460,7 +4384,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4460 attr->ia_size); 4384 attr->ia_size);
4461 if (error) { 4385 if (error) {
4462 /* Do as much error cleanup as possible */ 4386 /* Do as much error cleanup as possible */
4463 handle = ext4_journal_start(inode, 3); 4387 handle = ext4_journal_start(inode,
4388 EXT4_HT_INODE, 3);
4464 if (IS_ERR(handle)) { 4389 if (IS_ERR(handle)) {
4465 ext4_orphan_del(NULL, inode); 4390 ext4_orphan_del(NULL, inode);
4466 goto err_out; 4391 goto err_out;
@@ -4801,7 +4726,7 @@ void ext4_dirty_inode(struct inode *inode, int flags)
4801{ 4726{
4802 handle_t *handle; 4727 handle_t *handle;
4803 4728
4804 handle = ext4_journal_start(inode, 2); 4729 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4805 if (IS_ERR(handle)) 4730 if (IS_ERR(handle))
4806 goto out; 4731 goto out;
4807 4732
@@ -4902,7 +4827,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4902 4827
4903 /* Finally we can mark the inode as dirty. */ 4828 /* Finally we can mark the inode as dirty. */
4904 4829
4905 handle = ext4_journal_start(inode, 1); 4830 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
4906 if (IS_ERR(handle)) 4831 if (IS_ERR(handle))
4907 return PTR_ERR(handle); 4832 return PTR_ERR(handle);
4908 4833
@@ -4926,7 +4851,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4926 unsigned long len; 4851 unsigned long len;
4927 int ret; 4852 int ret;
4928 struct file *file = vma->vm_file; 4853 struct file *file = vma->vm_file;
4929 struct inode *inode = file->f_path.dentry->d_inode; 4854 struct inode *inode = file_inode(file);
4930 struct address_space *mapping = inode->i_mapping; 4855 struct address_space *mapping = inode->i_mapping;
4931 handle_t *handle; 4856 handle_t *handle;
4932 get_block_t *get_block; 4857 get_block_t *get_block;
@@ -4968,7 +4893,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4968 0, len, NULL, 4893 0, len, NULL,
4969 ext4_bh_unmapped)) { 4894 ext4_bh_unmapped)) {
4970 /* Wait so that we don't change page under IO */ 4895 /* Wait so that we don't change page under IO */
4971 wait_on_page_writeback(page); 4896 wait_for_stable_page(page);
4972 ret = VM_FAULT_LOCKED; 4897 ret = VM_FAULT_LOCKED;
4973 goto out; 4898 goto out;
4974 } 4899 }
@@ -4980,7 +4905,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4980 else 4905 else
4981 get_block = ext4_get_block; 4906 get_block = ext4_get_block;
4982retry_alloc: 4907retry_alloc:
4983 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 4908 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
4909 ext4_writepage_trans_blocks(inode));
4984 if (IS_ERR(handle)) { 4910 if (IS_ERR(handle)) {
4985 ret = VM_FAULT_SIGBUS; 4911 ret = VM_FAULT_SIGBUS;
4986 goto out; 4912 goto out;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5747f52f7c72..721f4d33e148 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -22,7 +22,7 @@
22 22
23long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 23long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
24{ 24{
25 struct inode *inode = filp->f_dentry->d_inode; 25 struct inode *inode = file_inode(filp);
26 struct super_block *sb = inode->i_sb; 26 struct super_block *sb = inode->i_sb;
27 struct ext4_inode_info *ei = EXT4_I(inode); 27 struct ext4_inode_info *ei = EXT4_I(inode);
28 unsigned int flags; 28 unsigned int flags;
@@ -104,7 +104,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
104 } else if (oldflags & EXT4_EOFBLOCKS_FL) 104 } else if (oldflags & EXT4_EOFBLOCKS_FL)
105 ext4_truncate(inode); 105 ext4_truncate(inode);
106 106
107 handle = ext4_journal_start(inode, 1); 107 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
108 if (IS_ERR(handle)) { 108 if (IS_ERR(handle)) {
109 err = PTR_ERR(handle); 109 err = PTR_ERR(handle);
110 goto flags_out; 110 goto flags_out;
@@ -173,7 +173,7 @@ flags_out:
173 } 173 }
174 174
175 mutex_lock(&inode->i_mutex); 175 mutex_lock(&inode->i_mutex);
176 handle = ext4_journal_start(inode, 1); 176 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
177 if (IS_ERR(handle)) { 177 if (IS_ERR(handle)) {
178 err = PTR_ERR(handle); 178 err = PTR_ERR(handle);
179 goto unlock_out; 179 goto unlock_out;
@@ -313,6 +313,9 @@ mext_out:
313 if (err == 0) 313 if (err == 0)
314 err = err2; 314 err = err2;
315 mnt_drop_write_file(filp); 315 mnt_drop_write_file(filp);
316 if (!err && ext4_has_group_desc_csum(sb) &&
317 test_opt(sb, INIT_INODE_TABLE))
318 err = ext4_register_li_request(sb, input.group);
316group_add_out: 319group_add_out:
317 ext4_resize_end(sb); 320 ext4_resize_end(sb);
318 return err; 321 return err;
@@ -358,6 +361,7 @@ group_add_out:
358 ext4_fsblk_t n_blocks_count; 361 ext4_fsblk_t n_blocks_count;
359 struct super_block *sb = inode->i_sb; 362 struct super_block *sb = inode->i_sb;
360 int err = 0, err2 = 0; 363 int err = 0, err2 = 0;
364 ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
361 365
362 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 366 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
363 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 367 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
@@ -388,6 +392,11 @@ group_add_out:
388 if (err == 0) 392 if (err == 0)
389 err = err2; 393 err = err2;
390 mnt_drop_write_file(filp); 394 mnt_drop_write_file(filp);
395 if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
396 ext4_has_group_desc_csum(sb) &&
397 test_opt(sb, INIT_INODE_TABLE))
398 err = ext4_register_li_request(sb, o_group);
399
391resizefs_out: 400resizefs_out:
392 ext4_resize_end(sb); 401 ext4_resize_end(sb);
393 return err; 402 return err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1bf6fe785c4f..6540ebe058e3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -23,11 +23,18 @@
23 23
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "mballoc.h" 25#include "mballoc.h"
26#include <linux/debugfs.h>
27#include <linux/log2.h> 26#include <linux/log2.h>
27#include <linux/module.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <trace/events/ext4.h> 29#include <trace/events/ext4.h>
30 30
31#ifdef CONFIG_EXT4_DEBUG
32ushort ext4_mballoc_debug __read_mostly;
33
34module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
35MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
36#endif
37
31/* 38/*
32 * MUSTDO: 39 * MUSTDO:
33 * - test ext4_ext_search_left() and ext4_ext_search_right() 40 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -1884,15 +1891,19 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1884 case 0: 1891 case 0:
1885 BUG_ON(ac->ac_2order == 0); 1892 BUG_ON(ac->ac_2order == 0);
1886 1893
1887 if (grp->bb_largest_free_order < ac->ac_2order)
1888 return 0;
1889
1890 /* Avoid using the first bg of a flexgroup for data files */ 1894 /* Avoid using the first bg of a flexgroup for data files */
1891 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1895 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1892 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && 1896 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1893 ((group % flex_size) == 0)) 1897 ((group % flex_size) == 0))
1894 return 0; 1898 return 0;
1895 1899
1900 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
1901 (free / fragments) >= ac->ac_g_ex.fe_len)
1902 return 1;
1903
1904 if (grp->bb_largest_free_order < ac->ac_2order)
1905 return 0;
1906
1896 return 1; 1907 return 1;
1897 case 1: 1908 case 1:
1898 if ((free / fragments) >= ac->ac_g_ex.fe_len) 1909 if ((free / fragments) >= ac->ac_g_ex.fe_len)
@@ -2007,7 +2018,7 @@ repeat:
2007 } 2018 }
2008 2019
2009 ac->ac_groups_scanned++; 2020 ac->ac_groups_scanned++;
2010 if (cr == 0) 2021 if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
2011 ext4_mb_simple_scan_group(ac, &e4b); 2022 ext4_mb_simple_scan_group(ac, &e4b);
2012 else if (cr == 1 && sbi->s_stripe && 2023 else if (cr == 1 && sbi->s_stripe &&
2013 !(ac->ac_g_ex.fe_len % sbi->s_stripe)) 2024 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
@@ -2656,40 +2667,6 @@ static void ext4_free_data_callback(struct super_block *sb,
2656 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2667 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2657} 2668}
2658 2669
2659#ifdef CONFIG_EXT4_DEBUG
2660u8 mb_enable_debug __read_mostly;
2661
2662static struct dentry *debugfs_dir;
2663static struct dentry *debugfs_debug;
2664
2665static void __init ext4_create_debugfs_entry(void)
2666{
2667 debugfs_dir = debugfs_create_dir("ext4", NULL);
2668 if (debugfs_dir)
2669 debugfs_debug = debugfs_create_u8("mballoc-debug",
2670 S_IRUGO | S_IWUSR,
2671 debugfs_dir,
2672 &mb_enable_debug);
2673}
2674
2675static void ext4_remove_debugfs_entry(void)
2676{
2677 debugfs_remove(debugfs_debug);
2678 debugfs_remove(debugfs_dir);
2679}
2680
2681#else
2682
2683static void __init ext4_create_debugfs_entry(void)
2684{
2685}
2686
2687static void ext4_remove_debugfs_entry(void)
2688{
2689}
2690
2691#endif
2692
2693int __init ext4_init_mballoc(void) 2670int __init ext4_init_mballoc(void)
2694{ 2671{
2695 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, 2672 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
@@ -2711,7 +2688,6 @@ int __init ext4_init_mballoc(void)
2711 kmem_cache_destroy(ext4_ac_cachep); 2688 kmem_cache_destroy(ext4_ac_cachep);
2712 return -ENOMEM; 2689 return -ENOMEM;
2713 } 2690 }
2714 ext4_create_debugfs_entry();
2715 return 0; 2691 return 0;
2716} 2692}
2717 2693
@@ -2726,7 +2702,6 @@ void ext4_exit_mballoc(void)
2726 kmem_cache_destroy(ext4_ac_cachep); 2702 kmem_cache_destroy(ext4_ac_cachep);
2727 kmem_cache_destroy(ext4_free_data_cachep); 2703 kmem_cache_destroy(ext4_free_data_cachep);
2728 ext4_groupinfo_destroy_slabs(); 2704 ext4_groupinfo_destroy_slabs();
2729 ext4_remove_debugfs_entry();
2730} 2705}
2731 2706
2732 2707
@@ -3872,7 +3847,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3872 struct super_block *sb = ac->ac_sb; 3847 struct super_block *sb = ac->ac_sb;
3873 ext4_group_t ngroups, i; 3848 ext4_group_t ngroups, i;
3874 3849
3875 if (!mb_enable_debug || 3850 if (!ext4_mballoc_debug ||
3876 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) 3851 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3877 return; 3852 return;
3878 3853
@@ -4005,8 +3980,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4005 len = ar->len; 3980 len = ar->len;
4006 3981
4007 /* just a dirty hack to filter too big requests */ 3982 /* just a dirty hack to filter too big requests */
4008 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10) 3983 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4009 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10; 3984 len = EXT4_CLUSTERS_PER_GROUP(sb);
4010 3985
4011 /* start searching from the goal */ 3986 /* start searching from the goal */
4012 goal = ar->goal; 3987 goal = ar->goal;
@@ -4136,7 +4111,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4136 /* The max size of hash table is PREALLOC_TB_SIZE */ 4111 /* The max size of hash table is PREALLOC_TB_SIZE */
4137 order = PREALLOC_TB_SIZE - 1; 4112 order = PREALLOC_TB_SIZE - 1;
4138 /* Add the prealloc space to lg */ 4113 /* Add the prealloc space to lg */
4139 rcu_read_lock(); 4114 spin_lock(&lg->lg_prealloc_lock);
4140 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], 4115 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4141 pa_inode_list) { 4116 pa_inode_list) {
4142 spin_lock(&tmp_pa->pa_lock); 4117 spin_lock(&tmp_pa->pa_lock);
@@ -4160,12 +4135,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4160 if (!added) 4135 if (!added)
4161 list_add_tail_rcu(&pa->pa_inode_list, 4136 list_add_tail_rcu(&pa->pa_inode_list,
4162 &lg->lg_prealloc_list[order]); 4137 &lg->lg_prealloc_list[order]);
4163 rcu_read_unlock(); 4138 spin_unlock(&lg->lg_prealloc_lock);
4164 4139
4165 /* Now trim the list to be not more than 8 elements */ 4140 /* Now trim the list to be not more than 8 elements */
4166 if (lg_prealloc_count > 8) { 4141 if (lg_prealloc_count > 8) {
4167 ext4_mb_discard_lg_preallocations(sb, lg, 4142 ext4_mb_discard_lg_preallocations(sb, lg,
4168 order, lg_prealloc_count); 4143 order, lg_prealloc_count);
4169 return; 4144 return;
4170 } 4145 }
4171 return ; 4146 return ;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 3ccd889ba953..08481ee84cd5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -37,11 +37,11 @@
37/* 37/*
38 */ 38 */
39#ifdef CONFIG_EXT4_DEBUG 39#ifdef CONFIG_EXT4_DEBUG
40extern u8 mb_enable_debug; 40extern ushort ext4_mballoc_debug;
41 41
42#define mb_debug(n, fmt, a...) \ 42#define mb_debug(n, fmt, a...) \
43 do { \ 43 do { \
44 if ((n) <= mb_enable_debug) { \ 44 if ((n) <= ext4_mballoc_debug) { \
45 printk(KERN_DEBUG "(%s, %d): %s: ", \ 45 printk(KERN_DEBUG "(%s, %d): %s: ", \
46 __FILE__, __LINE__, __func__); \ 46 __FILE__, __LINE__, __func__); \
47 printk(fmt, ## a); \ 47 printk(fmt, ## a); \
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index db8226d595fa..480acf4a085f 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -456,11 +456,14 @@ int ext4_ext_migrate(struct inode *inode)
456 */ 456 */
457 return retval; 457 return retval;
458 458
459 handle = ext4_journal_start(inode, 459 /*
460 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 460 * Worst case we can touch the allocation bitmaps, a bgd
461 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 461 * block, and a block to link in the orphan list. We do need
462 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) 462 * need to worry about credits for modifying the quota inode.
463 + 1); 463 */
464 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
465 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
466
464 if (IS_ERR(handle)) { 467 if (IS_ERR(handle)) {
465 retval = PTR_ERR(handle); 468 retval = PTR_ERR(handle);
466 return retval; 469 return retval;
@@ -507,7 +510,7 @@ int ext4_ext_migrate(struct inode *inode)
507 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 510 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
508 up_read((&EXT4_I(inode)->i_data_sem)); 511 up_read((&EXT4_I(inode)->i_data_sem));
509 512
510 handle = ext4_journal_start(inode, 1); 513 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
511 if (IS_ERR(handle)) { 514 if (IS_ERR(handle)) {
512 /* 515 /*
513 * It is impossible to update on-disk structures without 516 * It is impossible to update on-disk structures without
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index fe7c63f4717e..f9b551561d2c 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -80,6 +80,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
80 * is not blocked in the elevator. */ 80 * is not blocked in the elevator. */
81 if (!*bh) 81 if (!*bh)
82 *bh = sb_getblk(sb, mmp_block); 82 *bh = sb_getblk(sb, mmp_block);
83 if (!*bh)
84 return -ENOMEM;
83 if (*bh) { 85 if (*bh) {
84 get_bh(*bh); 86 get_bh(*bh);
85 lock_buffer(*bh); 87 lock_buffer(*bh);
@@ -91,7 +93,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
91 *bh = NULL; 93 *bh = NULL;
92 } 94 }
93 } 95 }
94 if (!*bh) { 96 if (unlikely(!*bh)) {
95 ext4_warning(sb, "Error while reading MMP block %llu", 97 ext4_warning(sb, "Error while reading MMP block %llu",
96 mmp_block); 98 mmp_block);
97 return -EIO; 99 return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index d9cc5ee42f53..4e81d47aa8cb 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -681,6 +681,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
681 681
682 depth = ext_depth(donor_inode); 682 depth = ext_depth(donor_inode);
683 dext = donor_path[depth].p_ext; 683 dext = donor_path[depth].p_ext;
684 if (unlikely(!dext))
685 goto missing_donor_extent;
684 tmp_dext = *dext; 686 tmp_dext = *dext;
685 687
686 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 688 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
@@ -691,7 +693,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
691 /* Loop for the donor extents */ 693 /* Loop for the donor extents */
692 while (1) { 694 while (1) {
693 /* The extent for donor must be found. */ 695 /* The extent for donor must be found. */
694 if (!dext) { 696 if (unlikely(!dext)) {
697 missing_donor_extent:
695 EXT4_ERROR_INODE(donor_inode, 698 EXT4_ERROR_INODE(donor_inode,
696 "The extent for donor must be found"); 699 "The extent for donor must be found");
697 *err = -EIO; 700 *err = -EIO;
@@ -761,9 +764,6 @@ out:
761 kfree(donor_path); 764 kfree(donor_path);
762 } 765 }
763 766
764 ext4_ext_invalidate_cache(orig_inode);
765 ext4_ext_invalidate_cache(donor_inode);
766
767 return replaced_count; 767 return replaced_count;
768} 768}
769 769
@@ -900,7 +900,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
900 pgoff_t orig_page_offset, int data_offset_in_page, 900 pgoff_t orig_page_offset, int data_offset_in_page,
901 int block_len_in_page, int uninit, int *err) 901 int block_len_in_page, int uninit, int *err)
902{ 902{
903 struct inode *orig_inode = o_filp->f_dentry->d_inode; 903 struct inode *orig_inode = file_inode(o_filp);
904 struct page *pagep[2] = {NULL, NULL}; 904 struct page *pagep[2] = {NULL, NULL};
905 handle_t *handle; 905 handle_t *handle;
906 ext4_lblk_t orig_blk_offset; 906 ext4_lblk_t orig_blk_offset;
@@ -920,7 +920,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
920again: 920again:
921 *err = 0; 921 *err = 0;
922 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 922 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
923 handle = ext4_journal_start(orig_inode, jblocks); 923 handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
924 if (IS_ERR(handle)) { 924 if (IS_ERR(handle)) {
925 *err = PTR_ERR(handle); 925 *err = PTR_ERR(handle);
926 return 0; 926 return 0;
@@ -1279,8 +1279,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1279 __u64 orig_start, __u64 donor_start, __u64 len, 1279 __u64 orig_start, __u64 donor_start, __u64 len,
1280 __u64 *moved_len) 1280 __u64 *moved_len)
1281{ 1281{
1282 struct inode *orig_inode = o_filp->f_dentry->d_inode; 1282 struct inode *orig_inode = file_inode(o_filp);
1283 struct inode *donor_inode = d_filp->f_dentry->d_inode; 1283 struct inode *donor_inode = file_inode(d_filp);
1284 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; 1284 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
1285 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; 1285 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1286 ext4_lblk_t block_start = orig_start; 1286 ext4_lblk_t block_start = orig_start;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8990165346ee..3825d6aa8336 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -47,38 +47,111 @@
47#define NAMEI_RA_CHUNKS 2 47#define NAMEI_RA_CHUNKS 2
48#define NAMEI_RA_BLOCKS 4 48#define NAMEI_RA_BLOCKS 4
49#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) 49#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
51 50
52static struct buffer_head *ext4_append(handle_t *handle, 51static struct buffer_head *ext4_append(handle_t *handle,
53 struct inode *inode, 52 struct inode *inode,
54 ext4_lblk_t *block, int *err) 53 ext4_lblk_t *block)
55{ 54{
56 struct buffer_head *bh; 55 struct buffer_head *bh;
56 int err = 0;
57 57
58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && 58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59 ((inode->i_size >> 10) >= 59 ((inode->i_size >> 10) >=
60 EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) { 60 EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
61 *err = -ENOSPC; 61 return ERR_PTR(-ENOSPC);
62 return NULL;
63 }
64 62
65 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 63 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
66 64
67 bh = ext4_bread(handle, inode, *block, 1, err); 65 bh = ext4_bread(handle, inode, *block, 1, &err);
68 if (bh) { 66 if (!bh)
69 inode->i_size += inode->i_sb->s_blocksize; 67 return ERR_PTR(err);
70 EXT4_I(inode)->i_disksize = inode->i_size; 68 inode->i_size += inode->i_sb->s_blocksize;
71 *err = ext4_journal_get_write_access(handle, bh); 69 EXT4_I(inode)->i_disksize = inode->i_size;
72 if (*err) { 70 err = ext4_journal_get_write_access(handle, bh);
71 if (err) {
72 brelse(bh);
73 ext4_std_error(inode->i_sb, err);
74 return ERR_PTR(err);
75 }
76 return bh;
77}
78
79static int ext4_dx_csum_verify(struct inode *inode,
80 struct ext4_dir_entry *dirent);
81
82typedef enum {
83 EITHER, INDEX, DIRENT
84} dirblock_type_t;
85
86#define ext4_read_dirblock(inode, block, type) \
87 __ext4_read_dirblock((inode), (block), (type), __LINE__)
88
89static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
90 ext4_lblk_t block,
91 dirblock_type_t type,
92 unsigned int line)
93{
94 struct buffer_head *bh;
95 struct ext4_dir_entry *dirent;
96 int err = 0, is_dx_block = 0;
97
98 bh = ext4_bread(NULL, inode, block, 0, &err);
99 if (!bh) {
100 if (err == 0) {
101 ext4_error_inode(inode, __func__, line, block,
102 "Directory hole found");
103 return ERR_PTR(-EIO);
104 }
105 __ext4_warning(inode->i_sb, __func__, line,
106 "error reading directory block "
107 "(ino %lu, block %lu)", inode->i_ino,
108 (unsigned long) block);
109 return ERR_PTR(err);
110 }
111 dirent = (struct ext4_dir_entry *) bh->b_data;
112 /* Determine whether or not we have an index block */
113 if (is_dx(inode)) {
114 if (block == 0)
115 is_dx_block = 1;
116 else if (ext4_rec_len_from_disk(dirent->rec_len,
117 inode->i_sb->s_blocksize) ==
118 inode->i_sb->s_blocksize)
119 is_dx_block = 1;
120 }
121 if (!is_dx_block && type == INDEX) {
122 ext4_error_inode(inode, __func__, line, block,
123 "directory leaf block found instead of index block");
124 return ERR_PTR(-EIO);
125 }
126 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
127 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
128 buffer_verified(bh))
129 return bh;
130
131 /*
132 * An empty leaf block can get mistaken for a index block; for
133 * this reason, we can only check the index checksum when the
134 * caller is sure it should be an index block.
135 */
136 if (is_dx_block && type == INDEX) {
137 if (ext4_dx_csum_verify(inode, dirent))
138 set_buffer_verified(bh);
139 else {
140 ext4_error_inode(inode, __func__, line, block,
141 "Directory index failed checksum");
73 brelse(bh); 142 brelse(bh);
74 bh = NULL; 143 return ERR_PTR(-EIO);
75 } 144 }
76 } 145 }
77 if (!bh && !(*err)) { 146 if (!is_dx_block) {
78 *err = -EIO; 147 if (ext4_dirent_csum_verify(inode, dirent))
79 ext4_error(inode->i_sb, 148 set_buffer_verified(bh);
80 "Directory hole detected on inode %lu\n", 149 else {
81 inode->i_ino); 150 ext4_error_inode(inode, __func__, line, block,
151 "Directory block failed checksum");
152 brelse(bh);
153 return ERR_PTR(-EIO);
154 }
82 } 155 }
83 return bh; 156 return bh;
84} 157}
@@ -604,9 +677,9 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
604 u32 hash; 677 u32 hash;
605 678
606 frame->bh = NULL; 679 frame->bh = NULL;
607 if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) { 680 bh = ext4_read_dirblock(dir, 0, INDEX);
608 if (*err == 0) 681 if (IS_ERR(bh)) {
609 *err = ERR_BAD_DX_DIR; 682 *err = PTR_ERR(bh);
610 goto fail; 683 goto fail;
611 } 684 }
612 root = (struct dx_root *) bh->b_data; 685 root = (struct dx_root *) bh->b_data;
@@ -643,15 +716,6 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
643 goto fail; 716 goto fail;
644 } 717 }
645 718
646 if (!buffer_verified(bh) &&
647 !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
648 ext4_warning(dir->i_sb, "Root failed checksum");
649 brelse(bh);
650 *err = ERR_BAD_DX_DIR;
651 goto fail;
652 }
653 set_buffer_verified(bh);
654
655 entries = (struct dx_entry *) (((char *)&root->info) + 719 entries = (struct dx_entry *) (((char *)&root->info) +
656 root->info.info_length); 720 root->info.info_length);
657 721
@@ -709,22 +773,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
709 frame->entries = entries; 773 frame->entries = entries;
710 frame->at = at; 774 frame->at = at;
711 if (!indirect--) return frame; 775 if (!indirect--) return frame;
712 if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) { 776 bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
713 if (!(*err)) 777 if (IS_ERR(bh)) {
714 *err = ERR_BAD_DX_DIR; 778 *err = PTR_ERR(bh);
715 goto fail2; 779 goto fail2;
716 } 780 }
717 at = entries = ((struct dx_node *) bh->b_data)->entries; 781 entries = ((struct dx_node *) bh->b_data)->entries;
718
719 if (!buffer_verified(bh) &&
720 !ext4_dx_csum_verify(dir,
721 (struct ext4_dir_entry *)bh->b_data)) {
722 ext4_warning(dir->i_sb, "Node failed checksum");
723 brelse(bh);
724 *err = ERR_BAD_DX_DIR;
725 goto fail;
726 }
727 set_buffer_verified(bh);
728 782
729 if (dx_get_limit(entries) != dx_node_limit (dir)) { 783 if (dx_get_limit(entries) != dx_node_limit (dir)) {
730 ext4_warning(dir->i_sb, 784 ext4_warning(dir->i_sb,
@@ -783,7 +837,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
783{ 837{
784 struct dx_frame *p; 838 struct dx_frame *p;
785 struct buffer_head *bh; 839 struct buffer_head *bh;
786 int err, num_frames = 0; 840 int num_frames = 0;
787 __u32 bhash; 841 __u32 bhash;
788 842
789 p = frame; 843 p = frame;
@@ -822,25 +876,9 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
822 * block so no check is necessary 876 * block so no check is necessary
823 */ 877 */
824 while (num_frames--) { 878 while (num_frames--) {
825 if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), 879 bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
826 0, &err))) { 880 if (IS_ERR(bh))
827 if (!err) { 881 return PTR_ERR(bh);
828 ext4_error(dir->i_sb,
829 "Directory hole detected on inode %lu\n",
830 dir->i_ino);
831 return -EIO;
832 }
833 return err; /* Failure */
834 }
835
836 if (!buffer_verified(bh) &&
837 !ext4_dx_csum_verify(dir,
838 (struct ext4_dir_entry *)bh->b_data)) {
839 ext4_warning(dir->i_sb, "Node failed checksum");
840 return -EIO;
841 }
842 set_buffer_verified(bh);
843
844 p++; 882 p++;
845 brelse(p->bh); 883 brelse(p->bh);
846 p->bh = bh; 884 p->bh = bh;
@@ -866,20 +904,9 @@ static int htree_dirblock_to_tree(struct file *dir_file,
866 904
867 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", 905 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
868 (unsigned long)block)); 906 (unsigned long)block));
869 if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) { 907 bh = ext4_read_dirblock(dir, block, DIRENT);
870 if (!err) { 908 if (IS_ERR(bh))
871 err = -EIO; 909 return PTR_ERR(bh);
872 ext4_error(dir->i_sb,
873 "Directory hole detected on inode %lu\n",
874 dir->i_ino);
875 }
876 return err;
877 }
878
879 if (!buffer_verified(bh) &&
880 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
881 return -EIO;
882 set_buffer_verified(bh);
883 910
884 de = (struct ext4_dir_entry_2 *) bh->b_data; 911 de = (struct ext4_dir_entry_2 *) bh->b_data;
885 top = (struct ext4_dir_entry_2 *) ((char *) de + 912 top = (struct ext4_dir_entry_2 *) ((char *) de +
@@ -937,7 +964,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
937 964
938 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 965 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
939 start_hash, start_minor_hash)); 966 start_hash, start_minor_hash));
940 dir = dir_file->f_path.dentry->d_inode; 967 dir = file_inode(dir_file);
941 if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { 968 if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
942 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 969 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
943 if (hinfo.hash_version <= DX_HASH_TEA) 970 if (hinfo.hash_version <= DX_HASH_TEA)
@@ -1333,26 +1360,11 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1333 return NULL; 1360 return NULL;
1334 do { 1361 do {
1335 block = dx_get_block(frame->at); 1362 block = dx_get_block(frame->at);
1336 if (!(bh = ext4_bread(NULL, dir, block, 0, err))) { 1363 bh = ext4_read_dirblock(dir, block, DIRENT);
1337 if (!(*err)) { 1364 if (IS_ERR(bh)) {
1338 *err = -EIO; 1365 *err = PTR_ERR(bh);
1339 ext4_error(dir->i_sb,
1340 "Directory hole detected on inode %lu\n",
1341 dir->i_ino);
1342 }
1343 goto errout;
1344 }
1345
1346 if (!buffer_verified(bh) &&
1347 !ext4_dirent_csum_verify(dir,
1348 (struct ext4_dir_entry *)bh->b_data)) {
1349 EXT4_ERROR_INODE(dir, "checksumming directory "
1350 "block %lu", (unsigned long)block);
1351 brelse(bh);
1352 *err = -EIO;
1353 goto errout; 1366 goto errout;
1354 } 1367 }
1355 set_buffer_verified(bh);
1356 retval = search_dirblock(bh, dir, d_name, 1368 retval = search_dirblock(bh, dir, d_name,
1357 block << EXT4_BLOCK_SIZE_BITS(sb), 1369 block << EXT4_BLOCK_SIZE_BITS(sb),
1358 res_dir); 1370 res_dir);
@@ -1536,11 +1548,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1536 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1548 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1537 csum_size = sizeof(struct ext4_dir_entry_tail); 1549 csum_size = sizeof(struct ext4_dir_entry_tail);
1538 1550
1539 bh2 = ext4_append (handle, dir, &newblock, &err); 1551 bh2 = ext4_append(handle, dir, &newblock);
1540 if (!(bh2)) { 1552 if (IS_ERR(bh2)) {
1541 brelse(*bh); 1553 brelse(*bh);
1542 *bh = NULL; 1554 *bh = NULL;
1543 goto errout; 1555 *error = PTR_ERR(bh2);
1556 return NULL;
1544 } 1557 }
1545 1558
1546 BUFFER_TRACE(*bh, "get_write_access"); 1559 BUFFER_TRACE(*bh, "get_write_access");
@@ -1621,7 +1634,6 @@ journal_error:
1621 brelse(bh2); 1634 brelse(bh2);
1622 *bh = NULL; 1635 *bh = NULL;
1623 ext4_std_error(dir->i_sb, err); 1636 ext4_std_error(dir->i_sb, err);
1624errout:
1625 *error = err; 1637 *error = err;
1626 return NULL; 1638 return NULL;
1627} 1639}
@@ -1699,7 +1711,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1699 const char *name = dentry->d_name.name; 1711 const char *name = dentry->d_name.name;
1700 int namelen = dentry->d_name.len; 1712 int namelen = dentry->d_name.len;
1701 unsigned int blocksize = dir->i_sb->s_blocksize; 1713 unsigned int blocksize = dir->i_sb->s_blocksize;
1702 unsigned short reclen;
1703 int csum_size = 0; 1714 int csum_size = 0;
1704 int err; 1715 int err;
1705 1716
@@ -1707,7 +1718,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1707 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1718 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1708 csum_size = sizeof(struct ext4_dir_entry_tail); 1719 csum_size = sizeof(struct ext4_dir_entry_tail);
1709 1720
1710 reclen = EXT4_DIR_REC_LEN(namelen);
1711 if (!de) { 1721 if (!de) {
1712 err = ext4_find_dest_de(dir, inode, 1722 err = ext4_find_dest_de(dir, inode,
1713 bh, bh->b_data, blocksize - csum_size, 1723 bh, bh->b_data, blocksize - csum_size,
@@ -1798,10 +1808,10 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1798 len = ((char *) root) + (blocksize - csum_size) - (char *) de; 1808 len = ((char *) root) + (blocksize - csum_size) - (char *) de;
1799 1809
1800 /* Allocate new block for the 0th block's dirents */ 1810 /* Allocate new block for the 0th block's dirents */
1801 bh2 = ext4_append(handle, dir, &block, &retval); 1811 bh2 = ext4_append(handle, dir, &block);
1802 if (!(bh2)) { 1812 if (IS_ERR(bh2)) {
1803 brelse(bh); 1813 brelse(bh);
1804 return retval; 1814 return PTR_ERR(bh2);
1805 } 1815 }
1806 ext4_set_inode_flag(dir, EXT4_INODE_INDEX); 1816 ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
1807 data1 = bh2->b_data; 1817 data1 = bh2->b_data;
@@ -1918,20 +1928,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1918 } 1928 }
1919 blocks = dir->i_size >> sb->s_blocksize_bits; 1929 blocks = dir->i_size >> sb->s_blocksize_bits;
1920 for (block = 0; block < blocks; block++) { 1930 for (block = 0; block < blocks; block++) {
1921 if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) { 1931 bh = ext4_read_dirblock(dir, block, DIRENT);
1922 if (!retval) { 1932 if (IS_ERR(bh))
1923 retval = -EIO; 1933 return PTR_ERR(bh);
1924 ext4_error(inode->i_sb, 1934
1925 "Directory hole detected on inode %lu\n",
1926 inode->i_ino);
1927 }
1928 return retval;
1929 }
1930 if (!buffer_verified(bh) &&
1931 !ext4_dirent_csum_verify(dir,
1932 (struct ext4_dir_entry *)bh->b_data))
1933 return -EIO;
1934 set_buffer_verified(bh);
1935 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1935 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1936 if (retval != -ENOSPC) { 1936 if (retval != -ENOSPC) {
1937 brelse(bh); 1937 brelse(bh);
@@ -1943,9 +1943,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1943 return make_indexed_dir(handle, dentry, inode, bh); 1943 return make_indexed_dir(handle, dentry, inode, bh);
1944 brelse(bh); 1944 brelse(bh);
1945 } 1945 }
1946 bh = ext4_append(handle, dir, &block, &retval); 1946 bh = ext4_append(handle, dir, &block);
1947 if (!bh) 1947 if (IS_ERR(bh))
1948 return retval; 1948 return PTR_ERR(bh);
1949 de = (struct ext4_dir_entry_2 *) bh->b_data; 1949 de = (struct ext4_dir_entry_2 *) bh->b_data;
1950 de->inode = 0; 1950 de->inode = 0;
1951 de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); 1951 de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
@@ -1982,22 +1982,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1982 return err; 1982 return err;
1983 entries = frame->entries; 1983 entries = frame->entries;
1984 at = frame->at; 1984 at = frame->at;
1985 1985 bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
1986 if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) { 1986 if (IS_ERR(bh)) {
1987 if (!err) { 1987 err = PTR_ERR(bh);
1988 err = -EIO; 1988 bh = NULL;
1989 ext4_error(dir->i_sb,
1990 "Directory hole detected on inode %lu\n",
1991 dir->i_ino);
1992 }
1993 goto cleanup; 1989 goto cleanup;
1994 } 1990 }
1995 1991
1996 if (!buffer_verified(bh) &&
1997 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
1998 goto journal_error;
1999 set_buffer_verified(bh);
2000
2001 BUFFER_TRACE(bh, "get_write_access"); 1992 BUFFER_TRACE(bh, "get_write_access");
2002 err = ext4_journal_get_write_access(handle, bh); 1993 err = ext4_journal_get_write_access(handle, bh);
2003 if (err) 1994 if (err)
@@ -2025,9 +2016,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
2025 err = -ENOSPC; 2016 err = -ENOSPC;
2026 goto cleanup; 2017 goto cleanup;
2027 } 2018 }
2028 bh2 = ext4_append (handle, dir, &newblock, &err); 2019 bh2 = ext4_append(handle, dir, &newblock);
2029 if (!(bh2)) 2020 if (IS_ERR(bh2)) {
2021 err = PTR_ERR(bh2);
2030 goto cleanup; 2022 goto cleanup;
2023 }
2031 node2 = (struct dx_node *)(bh2->b_data); 2024 node2 = (struct dx_node *)(bh2->b_data);
2032 entries2 = node2->entries; 2025 entries2 = node2->entries;
2033 memset(&node2->fake, 0, sizeof(struct fake_dirent)); 2026 memset(&node2->fake, 0, sizeof(struct fake_dirent));
@@ -2106,8 +2099,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
2106journal_error: 2099journal_error:
2107 ext4_std_error(dir->i_sb, err); 2100 ext4_std_error(dir->i_sb, err);
2108cleanup: 2101cleanup:
2109 if (bh) 2102 brelse(bh);
2110 brelse(bh);
2111 dx_release(frames); 2103 dx_release(frames);
2112 return err; 2104 return err;
2113} 2105}
@@ -2254,29 +2246,28 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2254{ 2246{
2255 handle_t *handle; 2247 handle_t *handle;
2256 struct inode *inode; 2248 struct inode *inode;
2257 int err, retries = 0; 2249 int err, credits, retries = 0;
2258 2250
2259 dquot_initialize(dir); 2251 dquot_initialize(dir);
2260 2252
2253 credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2254 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2255 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2261retry: 2256retry:
2262 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2263 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2258 NULL, EXT4_HT_DIR, credits);
2264 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 2259 handle = ext4_journal_current_handle();
2265 if (IS_ERR(handle))
2266 return PTR_ERR(handle);
2267
2268 if (IS_DIRSYNC(dir))
2269 ext4_handle_sync(handle);
2270
2271 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
2272 err = PTR_ERR(inode); 2260 err = PTR_ERR(inode);
2273 if (!IS_ERR(inode)) { 2261 if (!IS_ERR(inode)) {
2274 inode->i_op = &ext4_file_inode_operations; 2262 inode->i_op = &ext4_file_inode_operations;
2275 inode->i_fop = &ext4_file_operations; 2263 inode->i_fop = &ext4_file_operations;
2276 ext4_set_aops(inode); 2264 ext4_set_aops(inode);
2277 err = ext4_add_nondir(handle, dentry, inode); 2265 err = ext4_add_nondir(handle, dentry, inode);
2266 if (!err && IS_DIRSYNC(dir))
2267 ext4_handle_sync(handle);
2278 } 2268 }
2279 ext4_journal_stop(handle); 2269 if (handle)
2270 ext4_journal_stop(handle);
2280 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2271 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2281 goto retry; 2272 goto retry;
2282 return err; 2273 return err;
@@ -2287,31 +2278,30 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
2287{ 2278{
2288 handle_t *handle; 2279 handle_t *handle;
2289 struct inode *inode; 2280 struct inode *inode;
2290 int err, retries = 0; 2281 int err, credits, retries = 0;
2291 2282
2292 if (!new_valid_dev(rdev)) 2283 if (!new_valid_dev(rdev))
2293 return -EINVAL; 2284 return -EINVAL;
2294 2285
2295 dquot_initialize(dir); 2286 dquot_initialize(dir);
2296 2287
2288 credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2289 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2290 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2297retry: 2291retry:
2298 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2292 inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2299 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2293 NULL, EXT4_HT_DIR, credits);
2300 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 2294 handle = ext4_journal_current_handle();
2301 if (IS_ERR(handle))
2302 return PTR_ERR(handle);
2303
2304 if (IS_DIRSYNC(dir))
2305 ext4_handle_sync(handle);
2306
2307 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
2308 err = PTR_ERR(inode); 2295 err = PTR_ERR(inode);
2309 if (!IS_ERR(inode)) { 2296 if (!IS_ERR(inode)) {
2310 init_special_inode(inode, inode->i_mode, rdev); 2297 init_special_inode(inode, inode->i_mode, rdev);
2311 inode->i_op = &ext4_special_inode_operations; 2298 inode->i_op = &ext4_special_inode_operations;
2312 err = ext4_add_nondir(handle, dentry, inode); 2299 err = ext4_add_nondir(handle, dentry, inode);
2300 if (!err && IS_DIRSYNC(dir))
2301 ext4_handle_sync(handle);
2313 } 2302 }
2314 ext4_journal_stop(handle); 2303 if (handle)
2304 ext4_journal_stop(handle);
2315 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2305 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2316 goto retry; 2306 goto retry;
2317 return err; 2307 return err;
@@ -2351,6 +2341,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2351 struct buffer_head *dir_block = NULL; 2341 struct buffer_head *dir_block = NULL;
2352 struct ext4_dir_entry_2 *de; 2342 struct ext4_dir_entry_2 *de;
2353 struct ext4_dir_entry_tail *t; 2343 struct ext4_dir_entry_tail *t;
2344 ext4_lblk_t block = 0;
2354 unsigned int blocksize = dir->i_sb->s_blocksize; 2345 unsigned int blocksize = dir->i_sb->s_blocksize;
2355 int csum_size = 0; 2346 int csum_size = 0;
2356 int err; 2347 int err;
@@ -2367,17 +2358,10 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2367 goto out; 2358 goto out;
2368 } 2359 }
2369 2360
2370 inode->i_size = EXT4_I(inode)->i_disksize = blocksize; 2361 inode->i_size = 0;
2371 dir_block = ext4_bread(handle, inode, 0, 1, &err); 2362 dir_block = ext4_append(handle, inode, &block);
2372 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { 2363 if (IS_ERR(dir_block))
2373 if (!err) { 2364 return PTR_ERR(dir_block);
2374 err = -EIO;
2375 ext4_error(inode->i_sb,
2376 "Directory hole detected on inode %lu\n",
2377 inode->i_ino);
2378 }
2379 goto out;
2380 }
2381 BUFFER_TRACE(dir_block, "get_write_access"); 2365 BUFFER_TRACE(dir_block, "get_write_access");
2382 err = ext4_journal_get_write_access(handle, dir_block); 2366 err = ext4_journal_get_write_access(handle, dir_block);
2383 if (err) 2367 if (err)
@@ -2404,25 +2388,21 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2404{ 2388{
2405 handle_t *handle; 2389 handle_t *handle;
2406 struct inode *inode; 2390 struct inode *inode;
2407 int err, retries = 0; 2391 int err, credits, retries = 0;
2408 2392
2409 if (EXT4_DIR_LINK_MAX(dir)) 2393 if (EXT4_DIR_LINK_MAX(dir))
2410 return -EMLINK; 2394 return -EMLINK;
2411 2395
2412 dquot_initialize(dir); 2396 dquot_initialize(dir);
2413 2397
2398 credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2399 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2400 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2414retry: 2401retry:
2415 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2402 inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
2416 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2403 &dentry->d_name,
2417 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 2404 0, NULL, EXT4_HT_DIR, credits);
2418 if (IS_ERR(handle)) 2405 handle = ext4_journal_current_handle();
2419 return PTR_ERR(handle);
2420
2421 if (IS_DIRSYNC(dir))
2422 ext4_handle_sync(handle);
2423
2424 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
2425 &dentry->d_name, 0, NULL);
2426 err = PTR_ERR(inode); 2406 err = PTR_ERR(inode);
2427 if (IS_ERR(inode)) 2407 if (IS_ERR(inode))
2428 goto out_stop; 2408 goto out_stop;
@@ -2450,8 +2430,12 @@ out_clear_inode:
2450 goto out_clear_inode; 2430 goto out_clear_inode;
2451 unlock_new_inode(inode); 2431 unlock_new_inode(inode);
2452 d_instantiate(dentry, inode); 2432 d_instantiate(dentry, inode);
2433 if (IS_DIRSYNC(dir))
2434 ext4_handle_sync(handle);
2435
2453out_stop: 2436out_stop:
2454 ext4_journal_stop(handle); 2437 if (handle)
2438 ext4_journal_stop(handle);
2455 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2439 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2456 goto retry; 2440 goto retry;
2457 return err; 2441 return err;
@@ -2477,25 +2461,14 @@ static int empty_dir(struct inode *inode)
2477 } 2461 }
2478 2462
2479 sb = inode->i_sb; 2463 sb = inode->i_sb;
2480 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 2464 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
2481 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { 2465 EXT4_ERROR_INODE(inode, "invalid size");
2482 if (err)
2483 EXT4_ERROR_INODE(inode,
2484 "error %d reading directory lblock 0", err);
2485 else
2486 ext4_warning(inode->i_sb,
2487 "bad directory (dir #%lu) - no data block",
2488 inode->i_ino);
2489 return 1; 2466 return 1;
2490 } 2467 }
2491 if (!buffer_verified(bh) && 2468 bh = ext4_read_dirblock(inode, 0, EITHER);
2492 !ext4_dirent_csum_verify(inode, 2469 if (IS_ERR(bh))
2493 (struct ext4_dir_entry *)bh->b_data)) { 2470 return 1;
2494 EXT4_ERROR_INODE(inode, "checksum error reading directory " 2471
2495 "lblock 0");
2496 return -EIO;
2497 }
2498 set_buffer_verified(bh);
2499 de = (struct ext4_dir_entry_2 *) bh->b_data; 2472 de = (struct ext4_dir_entry_2 *) bh->b_data;
2500 de1 = ext4_next_entry(de, sb->s_blocksize); 2473 de1 = ext4_next_entry(de, sb->s_blocksize);
2501 if (le32_to_cpu(de->inode) != inode->i_ino || 2474 if (le32_to_cpu(de->inode) != inode->i_ino ||
@@ -2518,28 +2491,9 @@ static int empty_dir(struct inode *inode)
2518 err = 0; 2491 err = 0;
2519 brelse(bh); 2492 brelse(bh);
2520 lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); 2493 lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
2521 bh = ext4_bread(NULL, inode, lblock, 0, &err); 2494 bh = ext4_read_dirblock(inode, lblock, EITHER);
2522 if (!bh) { 2495 if (IS_ERR(bh))
2523 if (err) 2496 return 1;
2524 EXT4_ERROR_INODE(inode,
2525 "error %d reading directory "
2526 "lblock %u", err, lblock);
2527 else
2528 ext4_warning(inode->i_sb,
2529 "bad directory (dir #%lu) - no data block",
2530 inode->i_ino);
2531
2532 offset += sb->s_blocksize;
2533 continue;
2534 }
2535 if (!buffer_verified(bh) &&
2536 !ext4_dirent_csum_verify(inode,
2537 (struct ext4_dir_entry *)bh->b_data)) {
2538 EXT4_ERROR_INODE(inode, "checksum error "
2539 "reading directory lblock 0");
2540 return -EIO;
2541 }
2542 set_buffer_verified(bh);
2543 de = (struct ext4_dir_entry_2 *) bh->b_data; 2497 de = (struct ext4_dir_entry_2 *) bh->b_data;
2544 } 2498 }
2545 if (ext4_check_dir_entry(inode, NULL, de, bh, 2499 if (ext4_check_dir_entry(inode, NULL, de, bh,
@@ -2718,25 +2672,18 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2718 struct inode *inode; 2672 struct inode *inode;
2719 struct buffer_head *bh; 2673 struct buffer_head *bh;
2720 struct ext4_dir_entry_2 *de; 2674 struct ext4_dir_entry_2 *de;
2721 handle_t *handle; 2675 handle_t *handle = NULL;
2722 2676
2723 /* Initialize quotas before so that eventual writes go in 2677 /* Initialize quotas before so that eventual writes go in
2724 * separate transaction */ 2678 * separate transaction */
2725 dquot_initialize(dir); 2679 dquot_initialize(dir);
2726 dquot_initialize(dentry->d_inode); 2680 dquot_initialize(dentry->d_inode);
2727 2681
2728 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2729 if (IS_ERR(handle))
2730 return PTR_ERR(handle);
2731
2732 retval = -ENOENT; 2682 retval = -ENOENT;
2733 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2683 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2734 if (!bh) 2684 if (!bh)
2735 goto end_rmdir; 2685 goto end_rmdir;
2736 2686
2737 if (IS_DIRSYNC(dir))
2738 ext4_handle_sync(handle);
2739
2740 inode = dentry->d_inode; 2687 inode = dentry->d_inode;
2741 2688
2742 retval = -EIO; 2689 retval = -EIO;
@@ -2747,6 +2694,17 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2747 if (!empty_dir(inode)) 2694 if (!empty_dir(inode))
2748 goto end_rmdir; 2695 goto end_rmdir;
2749 2696
2697 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2698 EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2699 if (IS_ERR(handle)) {
2700 retval = PTR_ERR(handle);
2701 handle = NULL;
2702 goto end_rmdir;
2703 }
2704
2705 if (IS_DIRSYNC(dir))
2706 ext4_handle_sync(handle);
2707
2750 retval = ext4_delete_entry(handle, dir, de, bh); 2708 retval = ext4_delete_entry(handle, dir, de, bh);
2751 if (retval) 2709 if (retval)
2752 goto end_rmdir; 2710 goto end_rmdir;
@@ -2768,8 +2726,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2768 ext4_mark_inode_dirty(handle, dir); 2726 ext4_mark_inode_dirty(handle, dir);
2769 2727
2770end_rmdir: 2728end_rmdir:
2771 ext4_journal_stop(handle);
2772 brelse(bh); 2729 brelse(bh);
2730 if (handle)
2731 ext4_journal_stop(handle);
2773 return retval; 2732 return retval;
2774} 2733}
2775 2734
@@ -2779,7 +2738,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2779 struct inode *inode; 2738 struct inode *inode;
2780 struct buffer_head *bh; 2739 struct buffer_head *bh;
2781 struct ext4_dir_entry_2 *de; 2740 struct ext4_dir_entry_2 *de;
2782 handle_t *handle; 2741 handle_t *handle = NULL;
2783 2742
2784 trace_ext4_unlink_enter(dir, dentry); 2743 trace_ext4_unlink_enter(dir, dentry);
2785 /* Initialize quotas before so that eventual writes go 2744 /* Initialize quotas before so that eventual writes go
@@ -2787,13 +2746,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2787 dquot_initialize(dir); 2746 dquot_initialize(dir);
2788 dquot_initialize(dentry->d_inode); 2747 dquot_initialize(dentry->d_inode);
2789 2748
2790 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2791 if (IS_ERR(handle))
2792 return PTR_ERR(handle);
2793
2794 if (IS_DIRSYNC(dir))
2795 ext4_handle_sync(handle);
2796
2797 retval = -ENOENT; 2749 retval = -ENOENT;
2798 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2750 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2799 if (!bh) 2751 if (!bh)
@@ -2805,6 +2757,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2805 if (le32_to_cpu(de->inode) != inode->i_ino) 2757 if (le32_to_cpu(de->inode) != inode->i_ino)
2806 goto end_unlink; 2758 goto end_unlink;
2807 2759
2760 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2761 EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2762 if (IS_ERR(handle)) {
2763 retval = PTR_ERR(handle);
2764 handle = NULL;
2765 goto end_unlink;
2766 }
2767
2768 if (IS_DIRSYNC(dir))
2769 ext4_handle_sync(handle);
2770
2808 if (!inode->i_nlink) { 2771 if (!inode->i_nlink) {
2809 ext4_warning(inode->i_sb, 2772 ext4_warning(inode->i_sb,
2810 "Deleting nonexistent file (%lu), %d", 2773 "Deleting nonexistent file (%lu), %d",
@@ -2825,8 +2788,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2825 retval = 0; 2788 retval = 0;
2826 2789
2827end_unlink: 2790end_unlink:
2828 ext4_journal_stop(handle);
2829 brelse(bh); 2791 brelse(bh);
2792 if (handle)
2793 ext4_journal_stop(handle);
2830 trace_ext4_unlink_exit(dentry, retval); 2794 trace_ext4_unlink_exit(dentry, retval);
2831 return retval; 2795 return retval;
2832} 2796}
@@ -2866,15 +2830,10 @@ static int ext4_symlink(struct inode *dir,
2866 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); 2830 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2867 } 2831 }
2868retry: 2832retry:
2869 handle = ext4_journal_start(dir, credits); 2833 inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
2870 if (IS_ERR(handle)) 2834 &dentry->d_name, 0, NULL,
2871 return PTR_ERR(handle); 2835 EXT4_HT_DIR, credits);
2872 2836 handle = ext4_journal_current_handle();
2873 if (IS_DIRSYNC(dir))
2874 ext4_handle_sync(handle);
2875
2876 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2877 &dentry->d_name, 0, NULL);
2878 err = PTR_ERR(inode); 2837 err = PTR_ERR(inode);
2879 if (IS_ERR(inode)) 2838 if (IS_ERR(inode))
2880 goto out_stop; 2839 goto out_stop;
@@ -2904,7 +2863,7 @@ retry:
2904 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS 2863 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
2905 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified 2864 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2906 */ 2865 */
2907 handle = ext4_journal_start(dir, 2866 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2908 EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2867 EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2909 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); 2868 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2910 if (IS_ERR(handle)) { 2869 if (IS_ERR(handle)) {
@@ -2927,8 +2886,12 @@ retry:
2927 } 2886 }
2928 EXT4_I(inode)->i_disksize = inode->i_size; 2887 EXT4_I(inode)->i_disksize = inode->i_size;
2929 err = ext4_add_nondir(handle, dentry, inode); 2888 err = ext4_add_nondir(handle, dentry, inode);
2889 if (!err && IS_DIRSYNC(dir))
2890 ext4_handle_sync(handle);
2891
2930out_stop: 2892out_stop:
2931 ext4_journal_stop(handle); 2893 if (handle)
2894 ext4_journal_stop(handle);
2932 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2895 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2933 goto retry; 2896 goto retry;
2934 return err; 2897 return err;
@@ -2951,8 +2914,9 @@ static int ext4_link(struct dentry *old_dentry,
2951 dquot_initialize(dir); 2914 dquot_initialize(dir);
2952 2915
2953retry: 2916retry:
2954 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2917 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2955 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2918 (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2919 EXT4_INDEX_EXTRA_TRANS_BLOCKS));
2956 if (IS_ERR(handle)) 2920 if (IS_ERR(handle))
2957 return PTR_ERR(handle); 2921 return PTR_ERR(handle);
2958 2922
@@ -2992,13 +2956,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
2992 struct buffer_head *bh; 2956 struct buffer_head *bh;
2993 2957
2994 if (!ext4_has_inline_data(inode)) { 2958 if (!ext4_has_inline_data(inode)) {
2995 if (!(bh = ext4_bread(handle, inode, 0, 0, retval))) { 2959 bh = ext4_read_dirblock(inode, 0, EITHER);
2996 if (!*retval) { 2960 if (IS_ERR(bh)) {
2997 *retval = -EIO; 2961 *retval = PTR_ERR(bh);
2998 ext4_error(inode->i_sb,
2999 "Directory hole detected on inode %lu\n",
3000 inode->i_ino);
3001 }
3002 return NULL; 2962 return NULL;
3003 } 2963 }
3004 *parent_de = ext4_next_entry( 2964 *parent_de = ext4_next_entry(
@@ -3035,9 +2995,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3035 * in separate transaction */ 2995 * in separate transaction */
3036 if (new_dentry->d_inode) 2996 if (new_dentry->d_inode)
3037 dquot_initialize(new_dentry->d_inode); 2997 dquot_initialize(new_dentry->d_inode);
3038 handle = ext4_journal_start(old_dir, 2 * 2998 handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
3039 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + 2999 (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
3040 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); 3000 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3041 if (IS_ERR(handle)) 3001 if (IS_ERR(handle))
3042 return PTR_ERR(handle); 3002 return PTR_ERR(handle);
3043 3003
@@ -3077,11 +3037,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3077 &inlined); 3037 &inlined);
3078 if (!dir_bh) 3038 if (!dir_bh)
3079 goto end_rename; 3039 goto end_rename;
3080 if (!inlined && !buffer_verified(dir_bh) &&
3081 !ext4_dirent_csum_verify(old_inode,
3082 (struct ext4_dir_entry *)dir_bh->b_data))
3083 goto end_rename;
3084 set_buffer_verified(dir_bh);
3085 if (le32_to_cpu(parent_de->inode) != old_dir->i_ino) 3040 if (le32_to_cpu(parent_de->inode) != old_dir->i_ino)
3086 goto end_rename; 3041 goto end_rename;
3087 retval = -EMLINK; 3042 retval = -EMLINK;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 0016fbca2a40..809b31003ecc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -23,6 +23,7 @@
23#include <linux/workqueue.h> 23#include <linux/workqueue.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/mm.h>
26 27
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
@@ -73,8 +74,6 @@ void ext4_free_io_end(ext4_io_end_t *io)
73 BUG_ON(!list_empty(&io->list)); 74 BUG_ON(!list_empty(&io->list));
74 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 75 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
75 76
76 if (io->page)
77 put_page(io->page);
78 for (i = 0; i < io->num_io_pages; i++) 77 for (i = 0; i < io->num_io_pages; i++)
79 put_io_page(io->pages[i]); 78 put_io_page(io->pages[i]);
80 io->num_io_pages = 0; 79 io->num_io_pages = 0;
@@ -103,14 +102,13 @@ static int ext4_end_io(ext4_io_end_t *io)
103 "(inode %lu, offset %llu, size %zd, error %d)", 102 "(inode %lu, offset %llu, size %zd, error %d)",
104 inode->i_ino, offset, size, ret); 103 inode->i_ino, offset, size, ret);
105 } 104 }
106 if (io->iocb)
107 aio_complete(io->iocb, io->result, 0);
108
109 if (io->flag & EXT4_IO_END_DIRECT)
110 inode_dio_done(inode);
111 /* Wake up anyone waiting on unwritten extent conversion */ 105 /* Wake up anyone waiting on unwritten extent conversion */
112 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 106 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
113 wake_up_all(ext4_ioend_wq(inode)); 107 wake_up_all(ext4_ioend_wq(inode));
108 if (io->flag & EXT4_IO_END_DIRECT)
109 inode_dio_done(inode);
110 if (io->iocb)
111 aio_complete(io->iocb, io->result, 0);
114 return ret; 112 return ret;
115} 113}
116 114
@@ -119,7 +117,6 @@ static void dump_completed_IO(struct inode *inode)
119#ifdef EXT4FS_DEBUG 117#ifdef EXT4FS_DEBUG
120 struct list_head *cur, *before, *after; 118 struct list_head *cur, *before, *after;
121 ext4_io_end_t *io, *io0, *io1; 119 ext4_io_end_t *io, *io0, *io1;
122 unsigned long flags;
123 120
124 if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { 121 if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
125 ext4_debug("inode %lu completed_io list is empty\n", 122 ext4_debug("inode %lu completed_io list is empty\n",
@@ -152,26 +149,20 @@ void ext4_add_complete_io(ext4_io_end_t *io_end)
152 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 149 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
153 150
154 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 151 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
155 if (list_empty(&ei->i_completed_io_list)) { 152 if (list_empty(&ei->i_completed_io_list))
156 io_end->flag |= EXT4_IO_END_QUEUED; 153 queue_work(wq, &ei->i_unwritten_work);
157 queue_work(wq, &io_end->work);
158 }
159 list_add_tail(&io_end->list, &ei->i_completed_io_list); 154 list_add_tail(&io_end->list, &ei->i_completed_io_list);
160 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 155 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
161} 156}
162 157
163static int ext4_do_flush_completed_IO(struct inode *inode, 158static int ext4_do_flush_completed_IO(struct inode *inode)
164 ext4_io_end_t *work_io)
165{ 159{
166 ext4_io_end_t *io; 160 ext4_io_end_t *io;
167 struct list_head unwritten, complete, to_free; 161 struct list_head unwritten;
168 unsigned long flags; 162 unsigned long flags;
169 struct ext4_inode_info *ei = EXT4_I(inode); 163 struct ext4_inode_info *ei = EXT4_I(inode);
170 int err, ret = 0; 164 int err, ret = 0;
171 165
172 INIT_LIST_HEAD(&complete);
173 INIT_LIST_HEAD(&to_free);
174
175 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 166 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
176 dump_completed_IO(inode); 167 dump_completed_IO(inode);
177 list_replace_init(&ei->i_completed_io_list, &unwritten); 168 list_replace_init(&ei->i_completed_io_list, &unwritten);
@@ -185,32 +176,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
185 err = ext4_end_io(io); 176 err = ext4_end_io(io);
186 if (unlikely(!ret && err)) 177 if (unlikely(!ret && err))
187 ret = err; 178 ret = err;
188
189 list_add_tail(&io->list, &complete);
190 }
191 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
192 while (!list_empty(&complete)) {
193 io = list_entry(complete.next, ext4_io_end_t, list);
194 io->flag &= ~EXT4_IO_END_UNWRITTEN; 179 io->flag &= ~EXT4_IO_END_UNWRITTEN;
195 /* end_io context can not be destroyed now because it still
196 * used by queued worker. Worker thread will destroy it later */
197 if (io->flag & EXT4_IO_END_QUEUED)
198 list_del_init(&io->list);
199 else
200 list_move(&io->list, &to_free);
201 }
202 /* If we are called from worker context, it is time to clear queued
203 * flag, and destroy it's end_io if it was converted already */
204 if (work_io) {
205 work_io->flag &= ~EXT4_IO_END_QUEUED;
206 if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
207 list_add_tail(&work_io->list, &to_free);
208 }
209 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
210
211 while (!list_empty(&to_free)) {
212 io = list_entry(to_free.next, ext4_io_end_t, list);
213 list_del_init(&io->list);
214 ext4_free_io_end(io); 180 ext4_free_io_end(io);
215 } 181 }
216 return ret; 182 return ret;
@@ -219,10 +185,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
219/* 185/*
220 * work on completed aio dio IO, to convert unwritten extents to extents 186 * work on completed aio dio IO, to convert unwritten extents to extents
221 */ 187 */
222static void ext4_end_io_work(struct work_struct *work) 188void ext4_end_io_work(struct work_struct *work)
223{ 189{
224 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 190 struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
225 ext4_do_flush_completed_IO(io->inode, io); 191 i_unwritten_work);
192 ext4_do_flush_completed_IO(&ei->vfs_inode);
226} 193}
227 194
228int ext4_flush_unwritten_io(struct inode *inode) 195int ext4_flush_unwritten_io(struct inode *inode)
@@ -230,7 +197,7 @@ int ext4_flush_unwritten_io(struct inode *inode)
230 int ret; 197 int ret;
231 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && 198 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
232 !(inode->i_state & I_FREEING)); 199 !(inode->i_state & I_FREEING));
233 ret = ext4_do_flush_completed_IO(inode, NULL); 200 ret = ext4_do_flush_completed_IO(inode);
234 ext4_unwritten_wait(inode); 201 ext4_unwritten_wait(inode);
235 return ret; 202 return ret;
236} 203}
@@ -241,7 +208,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
241 if (io) { 208 if (io) {
242 atomic_inc(&EXT4_I(inode)->i_ioend_count); 209 atomic_inc(&EXT4_I(inode)->i_ioend_count);
243 io->inode = inode; 210 io->inode = inode;
244 INIT_WORK(&io->work, ext4_end_io_work);
245 INIT_LIST_HEAD(&io->list); 211 INIT_LIST_HEAD(&io->list);
246 } 212 }
247 return io; 213 return io;
@@ -382,14 +348,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
382 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); 348 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
383 } 349 }
384 350
385 if (!buffer_mapped(bh) || buffer_delay(bh)) {
386 if (!buffer_mapped(bh))
387 clear_buffer_dirty(bh);
388 if (io->io_bio)
389 ext4_io_submit(io);
390 return 0;
391 }
392
393 if (io->io_bio && bh->b_blocknr != io->io_next_block) { 351 if (io->io_bio && bh->b_blocknr != io->io_next_block) {
394submit_and_retry: 352submit_and_retry:
395 ext4_io_submit(io); 353 ext4_io_submit(io);
@@ -436,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
436 394
437 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); 395 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
438 if (!io_page) { 396 if (!io_page) {
439 set_page_dirty(page); 397 redirty_page_for_writepage(wbc, page);
440 unlock_page(page); 398 unlock_page(page);
441 return -ENOMEM; 399 return -ENOMEM;
442 } 400 }
@@ -468,7 +426,15 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
468 set_buffer_uptodate(bh); 426 set_buffer_uptodate(bh);
469 continue; 427 continue;
470 } 428 }
471 clear_buffer_dirty(bh); 429 if (!buffer_dirty(bh) || buffer_delay(bh) ||
430 !buffer_mapped(bh) || buffer_unwritten(bh)) {
431 /* A hole? We can safely clear the dirty bit */
432 if (!buffer_mapped(bh))
433 clear_buffer_dirty(bh);
434 if (io->io_bio)
435 ext4_io_submit(io);
436 continue;
437 }
472 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 438 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
473 if (ret) { 439 if (ret) {
474 /* 440 /*
@@ -476,9 +442,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
476 * we can do but mark the page as dirty, and 442 * we can do but mark the page as dirty, and
477 * better luck next time. 443 * better luck next time.
478 */ 444 */
479 set_page_dirty(page); 445 redirty_page_for_writepage(wbc, page);
480 break; 446 break;
481 } 447 }
448 clear_buffer_dirty(bh);
482 } 449 }
483 unlock_page(page); 450 unlock_page(page);
484 /* 451 /*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index d99387b89edd..c7f4d7584669 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -333,8 +333,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
333 int err; 333 int err;
334 334
335 bh = sb_getblk(sb, blk); 335 bh = sb_getblk(sb, blk);
336 if (!bh) 336 if (unlikely(!bh))
337 return ERR_PTR(-EIO); 337 return ERR_PTR(-ENOMEM);
338 if ((err = ext4_journal_get_write_access(handle, bh))) { 338 if ((err = ext4_journal_get_write_access(handle, bh))) {
339 brelse(bh); 339 brelse(bh);
340 bh = ERR_PTR(err); 340 bh = ERR_PTR(err);
@@ -410,8 +410,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
410 return err; 410 return err;
411 411
412 bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); 412 bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
413 if (!bh) 413 if (unlikely(!bh))
414 return -EIO; 414 return -ENOMEM;
415 415
416 err = ext4_journal_get_write_access(handle, bh); 416 err = ext4_journal_get_write_access(handle, bh);
417 if (err) 417 if (err)
@@ -466,7 +466,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
466 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); 466 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
467 467
468 /* This transaction may be extended/restarted along the way */ 468 /* This transaction may be extended/restarted along the way */
469 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 469 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
470 if (IS_ERR(handle)) 470 if (IS_ERR(handle))
471 return PTR_ERR(handle); 471 return PTR_ERR(handle);
472 472
@@ -500,8 +500,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
500 goto out; 500 goto out;
501 501
502 gdb = sb_getblk(sb, block); 502 gdb = sb_getblk(sb, block);
503 if (!gdb) { 503 if (unlikely(!gdb)) {
504 err = -EIO; 504 err = -ENOMEM;
505 goto out; 505 goto out;
506 } 506 }
507 507
@@ -1031,7 +1031,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
1031 handle_t *handle; 1031 handle_t *handle;
1032 int err = 0, err2; 1032 int err = 0, err2;
1033 1033
1034 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 1034 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
1035 if (IS_ERR(handle)) { 1035 if (IS_ERR(handle)) {
1036 group = 1; 1036 group = 1;
1037 err = PTR_ERR(handle); 1037 err = PTR_ERR(handle);
@@ -1064,8 +1064,8 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
1064 ext4_bg_has_super(sb, group)); 1064 ext4_bg_has_super(sb, group));
1065 1065
1066 bh = sb_getblk(sb, backup_block); 1066 bh = sb_getblk(sb, backup_block);
1067 if (!bh) { 1067 if (unlikely(!bh)) {
1068 err = -EIO; 1068 err = -ENOMEM;
1069 break; 1069 break;
1070 } 1070 }
1071 ext4_debug("update metadata backup %llu(+%llu)\n", 1071 ext4_debug("update metadata backup %llu(+%llu)\n",
@@ -1168,7 +1168,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1168static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) 1168static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
1169{ 1169{
1170 struct buffer_head *bh = sb_getblk(sb, block); 1170 struct buffer_head *bh = sb_getblk(sb, block);
1171 if (!bh) 1171 if (unlikely(!bh))
1172 return NULL; 1172 return NULL;
1173 if (!bh_uptodate_or_lock(bh)) { 1173 if (!bh_uptodate_or_lock(bh)) {
1174 if (bh_submit_read(bh) < 0) { 1174 if (bh_submit_read(bh) < 0) {
@@ -1412,7 +1412,7 @@ static int ext4_flex_group_add(struct super_block *sb,
1412 * modify each of the reserved GDT dindirect blocks. 1412 * modify each of the reserved GDT dindirect blocks.
1413 */ 1413 */
1414 credit = flex_gd->count * 4 + reserved_gdb; 1414 credit = flex_gd->count * 4 + reserved_gdb;
1415 handle = ext4_journal_start_sb(sb, credit); 1415 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
1416 if (IS_ERR(handle)) { 1416 if (IS_ERR(handle)) {
1417 err = PTR_ERR(handle); 1417 err = PTR_ERR(handle);
1418 goto exit; 1418 goto exit;
@@ -1506,10 +1506,12 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
1506 group_data[i].blocks_count = blocks_per_group; 1506 group_data[i].blocks_count = blocks_per_group;
1507 overhead = ext4_group_overhead_blocks(sb, group + i); 1507 overhead = ext4_group_overhead_blocks(sb, group + i);
1508 group_data[i].free_blocks_count = blocks_per_group - overhead; 1508 group_data[i].free_blocks_count = blocks_per_group - overhead;
1509 if (ext4_has_group_desc_csum(sb)) 1509 if (ext4_has_group_desc_csum(sb)) {
1510 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | 1510 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
1511 EXT4_BG_INODE_UNINIT; 1511 EXT4_BG_INODE_UNINIT;
1512 else 1512 if (!test_opt(sb, INIT_INODE_TABLE))
1513 flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED;
1514 } else
1513 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; 1515 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
1514 } 1516 }
1515 1517
@@ -1594,7 +1596,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
1594 1596
1595 err = ext4_alloc_flex_bg_array(sb, input->group + 1); 1597 err = ext4_alloc_flex_bg_array(sb, input->group + 1);
1596 if (err) 1598 if (err)
1597 return err; 1599 goto out;
1598 1600
1599 err = ext4_mb_alloc_groupinfo(sb, input->group + 1); 1601 err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
1600 if (err) 1602 if (err)
@@ -1622,7 +1624,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
1622 /* We will update the superblock, one block bitmap, and 1624 /* We will update the superblock, one block bitmap, and
1623 * one group descriptor via ext4_group_add_blocks(). 1625 * one group descriptor via ext4_group_add_blocks().
1624 */ 1626 */
1625 handle = ext4_journal_start_sb(sb, 3); 1627 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3);
1626 if (IS_ERR(handle)) { 1628 if (IS_ERR(handle)) {
1627 err = PTR_ERR(handle); 1629 err = PTR_ERR(handle);
1628 ext4_warning(sb, "error %d on journal start", err); 1630 ext4_warning(sb, "error %d on journal start", err);
@@ -1786,7 +1788,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
1786 credits += 3; /* block bitmap, bg descriptor, resize inode */ 1788 credits += 3; /* block bitmap, bg descriptor, resize inode */
1787 } 1789 }
1788 1790
1789 handle = ext4_journal_start_sb(sb, credits); 1791 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits);
1790 if (IS_ERR(handle)) 1792 if (IS_ERR(handle))
1791 return PTR_ERR(handle); 1793 return PTR_ERR(handle);
1792 1794
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3d4fb81bacd5..620cf5615ba2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -69,8 +69,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
69static void ext4_clear_journal_err(struct super_block *sb, 69static void ext4_clear_journal_err(struct super_block *sb,
70 struct ext4_super_block *es); 70 struct ext4_super_block *es);
71static int ext4_sync_fs(struct super_block *sb, int wait); 71static int ext4_sync_fs(struct super_block *sb, int wait);
72static const char *ext4_decode_error(struct super_block *sb, int errno,
73 char nbuf[16]);
74static int ext4_remount(struct super_block *sb, int *flags, char *data); 72static int ext4_remount(struct super_block *sb, int *flags, char *data);
75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 73static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
76static int ext4_unfreeze(struct super_block *sb); 74static int ext4_unfreeze(struct super_block *sb);
@@ -296,107 +294,6 @@ void ext4_itable_unused_set(struct super_block *sb,
296} 294}
297 295
298 296
299/* Just increment the non-pointer handle value */
300static handle_t *ext4_get_nojournal(void)
301{
302 handle_t *handle = current->journal_info;
303 unsigned long ref_cnt = (unsigned long)handle;
304
305 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
306
307 ref_cnt++;
308 handle = (handle_t *)ref_cnt;
309
310 current->journal_info = handle;
311 return handle;
312}
313
314
315/* Decrement the non-pointer handle value */
316static void ext4_put_nojournal(handle_t *handle)
317{
318 unsigned long ref_cnt = (unsigned long)handle;
319
320 BUG_ON(ref_cnt == 0);
321
322 ref_cnt--;
323 handle = (handle_t *)ref_cnt;
324
325 current->journal_info = handle;
326}
327
328/*
329 * Wrappers for jbd2_journal_start/end.
330 */
331handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
332{
333 journal_t *journal;
334
335 trace_ext4_journal_start(sb, nblocks, _RET_IP_);
336 if (sb->s_flags & MS_RDONLY)
337 return ERR_PTR(-EROFS);
338
339 WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
340 journal = EXT4_SB(sb)->s_journal;
341 if (!journal)
342 return ext4_get_nojournal();
343 /*
344 * Special case here: if the journal has aborted behind our
345 * backs (eg. EIO in the commit thread), then we still need to
346 * take the FS itself readonly cleanly.
347 */
348 if (is_journal_aborted(journal)) {
349 ext4_abort(sb, "Detected aborted journal");
350 return ERR_PTR(-EROFS);
351 }
352 return jbd2_journal_start(journal, nblocks);
353}
354
355int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
356{
357 struct super_block *sb;
358 int err;
359 int rc;
360
361 if (!ext4_handle_valid(handle)) {
362 ext4_put_nojournal(handle);
363 return 0;
364 }
365 sb = handle->h_transaction->t_journal->j_private;
366 err = handle->h_err;
367 rc = jbd2_journal_stop(handle);
368
369 if (!err)
370 err = rc;
371 if (err)
372 __ext4_std_error(sb, where, line, err);
373 return err;
374}
375
376void ext4_journal_abort_handle(const char *caller, unsigned int line,
377 const char *err_fn, struct buffer_head *bh,
378 handle_t *handle, int err)
379{
380 char nbuf[16];
381 const char *errstr = ext4_decode_error(NULL, err, nbuf);
382
383 BUG_ON(!ext4_handle_valid(handle));
384
385 if (bh)
386 BUFFER_TRACE(bh, "abort");
387
388 if (!handle->h_err)
389 handle->h_err = err;
390
391 if (is_handle_aborted(handle))
392 return;
393
394 printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
395 caller, line, errstr, err_fn);
396
397 jbd2_journal_abort_handle(handle);
398}
399
400static void __save_error_info(struct super_block *sb, const char *func, 297static void __save_error_info(struct super_block *sb, const char *func,
401 unsigned int line) 298 unsigned int line)
402{ 299{
@@ -553,7 +450,7 @@ void ext4_error_file(struct file *file, const char *function,
553 va_list args; 450 va_list args;
554 struct va_format vaf; 451 struct va_format vaf;
555 struct ext4_super_block *es; 452 struct ext4_super_block *es;
556 struct inode *inode = file->f_dentry->d_inode; 453 struct inode *inode = file_inode(file);
557 char pathname[80], *path; 454 char pathname[80], *path;
558 455
559 es = EXT4_SB(inode->i_sb)->s_es; 456 es = EXT4_SB(inode->i_sb)->s_es;
@@ -582,8 +479,8 @@ void ext4_error_file(struct file *file, const char *function,
582 ext4_handle_error(inode->i_sb); 479 ext4_handle_error(inode->i_sb);
583} 480}
584 481
585static const char *ext4_decode_error(struct super_block *sb, int errno, 482const char *ext4_decode_error(struct super_block *sb, int errno,
586 char nbuf[16]) 483 char nbuf[16])
587{ 484{
588 char *errstr = NULL; 485 char *errstr = NULL;
589 486
@@ -858,6 +755,7 @@ static void ext4_put_super(struct super_block *sb)
858 ext4_abort(sb, "Couldn't clean up the journal"); 755 ext4_abort(sb, "Couldn't clean up the journal");
859 } 756 }
860 757
758 ext4_es_unregister_shrinker(sb);
861 del_timer(&sbi->s_err_report); 759 del_timer(&sbi->s_err_report);
862 ext4_release_system_zone(sb); 760 ext4_release_system_zone(sb);
863 ext4_mb_release(sb); 761 ext4_mb_release(sb);
@@ -939,11 +837,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
939 return NULL; 837 return NULL;
940 838
941 ei->vfs_inode.i_version = 1; 839 ei->vfs_inode.i_version = 1;
942 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
943 INIT_LIST_HEAD(&ei->i_prealloc_list); 840 INIT_LIST_HEAD(&ei->i_prealloc_list);
944 spin_lock_init(&ei->i_prealloc_lock); 841 spin_lock_init(&ei->i_prealloc_lock);
945 ext4_es_init_tree(&ei->i_es_tree); 842 ext4_es_init_tree(&ei->i_es_tree);
946 rwlock_init(&ei->i_es_lock); 843 rwlock_init(&ei->i_es_lock);
844 INIT_LIST_HEAD(&ei->i_es_lru);
845 ei->i_es_lru_nr = 0;
947 ei->i_reserved_data_blocks = 0; 846 ei->i_reserved_data_blocks = 0;
948 ei->i_reserved_meta_blocks = 0; 847 ei->i_reserved_meta_blocks = 0;
949 ei->i_allocated_meta_blocks = 0; 848 ei->i_allocated_meta_blocks = 0;
@@ -960,6 +859,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
960 ei->i_datasync_tid = 0; 859 ei->i_datasync_tid = 0;
961 atomic_set(&ei->i_ioend_count, 0); 860 atomic_set(&ei->i_ioend_count, 0);
962 atomic_set(&ei->i_unwritten, 0); 861 atomic_set(&ei->i_unwritten, 0);
862 INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work);
963 863
964 return &ei->vfs_inode; 864 return &ei->vfs_inode;
965} 865}
@@ -1031,6 +931,7 @@ void ext4_clear_inode(struct inode *inode)
1031 dquot_drop(inode); 931 dquot_drop(inode);
1032 ext4_discard_preallocations(inode); 932 ext4_discard_preallocations(inode);
1033 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 933 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
934 ext4_es_lru_del(inode);
1034 if (EXT4_I(inode)->jinode) { 935 if (EXT4_I(inode)->jinode) {
1035 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 936 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1036 EXT4_I(inode)->jinode); 937 EXT4_I(inode)->jinode);
@@ -1280,8 +1181,8 @@ static const match_table_t tokens = {
1280 {Opt_stripe, "stripe=%u"}, 1181 {Opt_stripe, "stripe=%u"},
1281 {Opt_delalloc, "delalloc"}, 1182 {Opt_delalloc, "delalloc"},
1282 {Opt_nodelalloc, "nodelalloc"}, 1183 {Opt_nodelalloc, "nodelalloc"},
1283 {Opt_mblk_io_submit, "mblk_io_submit"}, 1184 {Opt_removed, "mblk_io_submit"},
1284 {Opt_nomblk_io_submit, "nomblk_io_submit"}, 1185 {Opt_removed, "nomblk_io_submit"},
1285 {Opt_block_validity, "block_validity"}, 1186 {Opt_block_validity, "block_validity"},
1286 {Opt_noblock_validity, "noblock_validity"}, 1187 {Opt_noblock_validity, "noblock_validity"},
1287 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1188 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1337,6 +1238,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1337{ 1238{
1338 struct ext4_sb_info *sbi = EXT4_SB(sb); 1239 struct ext4_sb_info *sbi = EXT4_SB(sb);
1339 char *qname; 1240 char *qname;
1241 int ret = -1;
1340 1242
1341 if (sb_any_quota_loaded(sb) && 1243 if (sb_any_quota_loaded(sb) &&
1342 !sbi->s_qf_names[qtype]) { 1244 !sbi->s_qf_names[qtype]) {
@@ -1351,23 +1253,26 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1351 "Not enough memory for storing quotafile name"); 1253 "Not enough memory for storing quotafile name");
1352 return -1; 1254 return -1;
1353 } 1255 }
1354 if (sbi->s_qf_names[qtype] && 1256 if (sbi->s_qf_names[qtype]) {
1355 strcmp(sbi->s_qf_names[qtype], qname)) { 1257 if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1356 ext4_msg(sb, KERN_ERR, 1258 ret = 1;
1357 "%s quota file already specified", QTYPE2NAME(qtype)); 1259 else
1358 kfree(qname); 1260 ext4_msg(sb, KERN_ERR,
1359 return -1; 1261 "%s quota file already specified",
1262 QTYPE2NAME(qtype));
1263 goto errout;
1360 } 1264 }
1361 sbi->s_qf_names[qtype] = qname; 1265 if (strchr(qname, '/')) {
1362 if (strchr(sbi->s_qf_names[qtype], '/')) {
1363 ext4_msg(sb, KERN_ERR, 1266 ext4_msg(sb, KERN_ERR,
1364 "quotafile must be on filesystem root"); 1267 "quotafile must be on filesystem root");
1365 kfree(sbi->s_qf_names[qtype]); 1268 goto errout;
1366 sbi->s_qf_names[qtype] = NULL;
1367 return -1;
1368 } 1269 }
1270 sbi->s_qf_names[qtype] = qname;
1369 set_opt(sb, QUOTA); 1271 set_opt(sb, QUOTA);
1370 return 1; 1272 return 1;
1273errout:
1274 kfree(qname);
1275 return ret;
1371} 1276}
1372 1277
1373static int clear_qf_name(struct super_block *sb, int qtype) 1278static int clear_qf_name(struct super_block *sb, int qtype)
@@ -1381,10 +1286,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1381 " when quota turned on"); 1286 " when quota turned on");
1382 return -1; 1287 return -1;
1383 } 1288 }
1384 /* 1289 kfree(sbi->s_qf_names[qtype]);
1385 * The space will be released later when all options are confirmed
1386 * to be correct
1387 */
1388 sbi->s_qf_names[qtype] = NULL; 1290 sbi->s_qf_names[qtype] = NULL;
1389 return 1; 1291 return 1;
1390} 1292}
@@ -1404,6 +1306,9 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1404#define MOPT_QFMT MOPT_NOSUPPORT 1306#define MOPT_QFMT MOPT_NOSUPPORT
1405#endif 1307#endif
1406#define MOPT_DATAJ 0x0080 1308#define MOPT_DATAJ 0x0080
1309#define MOPT_NO_EXT2 0x0100
1310#define MOPT_NO_EXT3 0x0200
1311#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1407 1312
1408static const struct mount_opts { 1313static const struct mount_opts {
1409 int token; 1314 int token;
@@ -1414,25 +1319,31 @@ static const struct mount_opts {
1414 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, 1319 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1415 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, 1320 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1416 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, 1321 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1417 {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
1418 {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
1419 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, 1322 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1420 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, 1323 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1421 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET}, 1324 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1422 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR}, 1325 MOPT_EXT4_ONLY | MOPT_SET},
1326 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1327 MOPT_EXT4_ONLY | MOPT_CLEAR},
1423 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, 1328 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1424 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, 1329 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1425 {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT}, 1330 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1426 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT}, 1331 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1427 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET}, 1332 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1333 MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
1334 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1335 MOPT_EXT4_ONLY | MOPT_SET},
1428 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1336 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1429 EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET}, 1337 EXT4_MOUNT_JOURNAL_CHECKSUM),
1430 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET}, 1338 MOPT_EXT4_ONLY | MOPT_SET},
1339 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1431 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, 1340 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1432 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, 1341 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1433 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, 1342 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1434 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET}, 1343 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1435 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR}, 1344 MOPT_NO_EXT2 | MOPT_SET},
1345 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1346 MOPT_NO_EXT2 | MOPT_CLEAR},
1436 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, 1347 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1437 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, 1348 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1438 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, 1349 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
@@ -1444,9 +1355,14 @@ static const struct mount_opts {
1444 {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1355 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1445 {Opt_init_itable, 0, MOPT_GTE0}, 1356 {Opt_init_itable, 0, MOPT_GTE0},
1446 {Opt_stripe, 0, MOPT_GTE0}, 1357 {Opt_stripe, 0, MOPT_GTE0},
1447 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, 1358 {Opt_resuid, 0, MOPT_GTE0},
1448 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, 1359 {Opt_resgid, 0, MOPT_GTE0},
1449 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, 1360 {Opt_journal_dev, 0, MOPT_GTE0},
1361 {Opt_journal_ioprio, 0, MOPT_GTE0},
1362 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1363 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1364 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1365 MOPT_NO_EXT2 | MOPT_DATAJ},
1450 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1366 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1451 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1367 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1452#ifdef CONFIG_EXT4_FS_POSIX_ACL 1368#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1496,8 +1412,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1496 else if (token == Opt_offgrpjquota) 1412 else if (token == Opt_offgrpjquota)
1497 return clear_qf_name(sb, GRPQUOTA); 1413 return clear_qf_name(sb, GRPQUOTA);
1498#endif 1414#endif
1499 if (args->from && match_int(args, &arg))
1500 return -1;
1501 switch (token) { 1415 switch (token) {
1502 case Opt_noacl: 1416 case Opt_noacl:
1503 case Opt_nouser_xattr: 1417 case Opt_nouser_xattr:
@@ -1506,138 +1420,149 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1506 case Opt_sb: 1420 case Opt_sb:
1507 return 1; /* handled by get_sb_block() */ 1421 return 1; /* handled by get_sb_block() */
1508 case Opt_removed: 1422 case Opt_removed:
1509 ext4_msg(sb, KERN_WARNING, 1423 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1510 "Ignoring removed %s option", opt); 1424 return 1;
1425 case Opt_abort:
1426 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1427 return 1;
1428 case Opt_i_version:
1429 sb->s_flags |= MS_I_VERSION;
1511 return 1; 1430 return 1;
1512 case Opt_resuid: 1431 }
1432
1433 for (m = ext4_mount_opts; m->token != Opt_err; m++)
1434 if (token == m->token)
1435 break;
1436
1437 if (m->token == Opt_err) {
1438 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1439 "or missing value", opt);
1440 return -1;
1441 }
1442
1443 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1444 ext4_msg(sb, KERN_ERR,
1445 "Mount option \"%s\" incompatible with ext2", opt);
1446 return -1;
1447 }
1448 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1449 ext4_msg(sb, KERN_ERR,
1450 "Mount option \"%s\" incompatible with ext3", opt);
1451 return -1;
1452 }
1453
1454 if (args->from && match_int(args, &arg))
1455 return -1;
1456 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1457 return -1;
1458 if (m->flags & MOPT_EXPLICIT)
1459 set_opt2(sb, EXPLICIT_DELALLOC);
1460 if (m->flags & MOPT_CLEAR_ERR)
1461 clear_opt(sb, ERRORS_MASK);
1462 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1463 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1464 "options when quota turned on");
1465 return -1;
1466 }
1467
1468 if (m->flags & MOPT_NOSUPPORT) {
1469 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1470 } else if (token == Opt_commit) {
1471 if (arg == 0)
1472 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1473 sbi->s_commit_interval = HZ * arg;
1474 } else if (token == Opt_max_batch_time) {
1475 if (arg == 0)
1476 arg = EXT4_DEF_MAX_BATCH_TIME;
1477 sbi->s_max_batch_time = arg;
1478 } else if (token == Opt_min_batch_time) {
1479 sbi->s_min_batch_time = arg;
1480 } else if (token == Opt_inode_readahead_blks) {
1481 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1482 ext4_msg(sb, KERN_ERR,
1483 "EXT4-fs: inode_readahead_blks must be "
1484 "0 or a power of 2 smaller than 2^31");
1485 return -1;
1486 }
1487 sbi->s_inode_readahead_blks = arg;
1488 } else if (token == Opt_init_itable) {
1489 set_opt(sb, INIT_INODE_TABLE);
1490 if (!args->from)
1491 arg = EXT4_DEF_LI_WAIT_MULT;
1492 sbi->s_li_wait_mult = arg;
1493 } else if (token == Opt_max_dir_size_kb) {
1494 sbi->s_max_dir_size_kb = arg;
1495 } else if (token == Opt_stripe) {
1496 sbi->s_stripe = arg;
1497 } else if (token == Opt_resuid) {
1513 uid = make_kuid(current_user_ns(), arg); 1498 uid = make_kuid(current_user_ns(), arg);
1514 if (!uid_valid(uid)) { 1499 if (!uid_valid(uid)) {
1515 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); 1500 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1516 return -1; 1501 return -1;
1517 } 1502 }
1518 sbi->s_resuid = uid; 1503 sbi->s_resuid = uid;
1519 return 1; 1504 } else if (token == Opt_resgid) {
1520 case Opt_resgid:
1521 gid = make_kgid(current_user_ns(), arg); 1505 gid = make_kgid(current_user_ns(), arg);
1522 if (!gid_valid(gid)) { 1506 if (!gid_valid(gid)) {
1523 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); 1507 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1524 return -1; 1508 return -1;
1525 } 1509 }
1526 sbi->s_resgid = gid; 1510 sbi->s_resgid = gid;
1527 return 1; 1511 } else if (token == Opt_journal_dev) {
1528 case Opt_abort:
1529 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1530 return 1;
1531 case Opt_i_version:
1532 sb->s_flags |= MS_I_VERSION;
1533 return 1;
1534 case Opt_journal_dev:
1535 if (is_remount) { 1512 if (is_remount) {
1536 ext4_msg(sb, KERN_ERR, 1513 ext4_msg(sb, KERN_ERR,
1537 "Cannot specify journal on remount"); 1514 "Cannot specify journal on remount");
1538 return -1; 1515 return -1;
1539 } 1516 }
1540 *journal_devnum = arg; 1517 *journal_devnum = arg;
1541 return 1; 1518 } else if (token == Opt_journal_ioprio) {
1542 case Opt_journal_ioprio: 1519 if (arg > 7) {
1543 if (arg < 0 || arg > 7) 1520 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1544 return -1; 1521 " (must be 0-7)");
1545 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1546 return 1;
1547 }
1548
1549 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1550 if (token != m->token)
1551 continue;
1552 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1553 return -1;
1554 if (m->flags & MOPT_EXPLICIT)
1555 set_opt2(sb, EXPLICIT_DELALLOC);
1556 if (m->flags & MOPT_CLEAR_ERR)
1557 clear_opt(sb, ERRORS_MASK);
1558 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1559 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1560 "options when quota turned on");
1561 return -1; 1522 return -1;
1562 } 1523 }
1563 1524 *journal_ioprio =
1564 if (m->flags & MOPT_NOSUPPORT) { 1525 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1565 ext4_msg(sb, KERN_ERR, "%s option not supported", opt); 1526 } else if (m->flags & MOPT_DATAJ) {
1566 } else if (token == Opt_commit) { 1527 if (is_remount) {
1567 if (arg == 0) 1528 if (!sbi->s_journal)
1568 arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1529 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1569 sbi->s_commit_interval = HZ * arg; 1530 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1570 } else if (token == Opt_max_batch_time) {
1571 if (arg == 0)
1572 arg = EXT4_DEF_MAX_BATCH_TIME;
1573 sbi->s_max_batch_time = arg;
1574 } else if (token == Opt_min_batch_time) {
1575 sbi->s_min_batch_time = arg;
1576 } else if (token == Opt_inode_readahead_blks) {
1577 if (arg > (1 << 30))
1578 return -1;
1579 if (arg && !is_power_of_2(arg)) {
1580 ext4_msg(sb, KERN_ERR, 1531 ext4_msg(sb, KERN_ERR,
1581 "EXT4-fs: inode_readahead_blks"
1582 " must be a power of 2");
1583 return -1;
1584 }
1585 sbi->s_inode_readahead_blks = arg;
1586 } else if (token == Opt_init_itable) {
1587 set_opt(sb, INIT_INODE_TABLE);
1588 if (!args->from)
1589 arg = EXT4_DEF_LI_WAIT_MULT;
1590 sbi->s_li_wait_mult = arg;
1591 } else if (token == Opt_max_dir_size_kb) {
1592 sbi->s_max_dir_size_kb = arg;
1593 } else if (token == Opt_stripe) {
1594 sbi->s_stripe = arg;
1595 } else if (m->flags & MOPT_DATAJ) {
1596 if (is_remount) {
1597 if (!sbi->s_journal)
1598 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1599 else if (test_opt(sb, DATA_FLAGS) !=
1600 m->mount_opt) {
1601 ext4_msg(sb, KERN_ERR,
1602 "Cannot change data mode on remount"); 1532 "Cannot change data mode on remount");
1603 return -1;
1604 }
1605 } else {
1606 clear_opt(sb, DATA_FLAGS);
1607 sbi->s_mount_opt |= m->mount_opt;
1608 }
1609#ifdef CONFIG_QUOTA
1610 } else if (m->flags & MOPT_QFMT) {
1611 if (sb_any_quota_loaded(sb) &&
1612 sbi->s_jquota_fmt != m->mount_opt) {
1613 ext4_msg(sb, KERN_ERR, "Cannot "
1614 "change journaled quota options "
1615 "when quota turned on");
1616 return -1; 1533 return -1;
1617 } 1534 }
1618 sbi->s_jquota_fmt = m->mount_opt;
1619#endif
1620 } else { 1535 } else {
1621 if (!args->from) 1536 clear_opt(sb, DATA_FLAGS);
1622 arg = 1; 1537 sbi->s_mount_opt |= m->mount_opt;
1623 if (m->flags & MOPT_CLEAR)
1624 arg = !arg;
1625 else if (unlikely(!(m->flags & MOPT_SET))) {
1626 ext4_msg(sb, KERN_WARNING,
1627 "buggy handling of option %s", opt);
1628 WARN_ON(1);
1629 return -1;
1630 }
1631 if (arg != 0)
1632 sbi->s_mount_opt |= m->mount_opt;
1633 else
1634 sbi->s_mount_opt &= ~m->mount_opt;
1635 } 1538 }
1636 return 1; 1539#ifdef CONFIG_QUOTA
1540 } else if (m->flags & MOPT_QFMT) {
1541 if (sb_any_quota_loaded(sb) &&
1542 sbi->s_jquota_fmt != m->mount_opt) {
1543 ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1544 "quota options when quota turned on");
1545 return -1;
1546 }
1547 sbi->s_jquota_fmt = m->mount_opt;
1548#endif
1549 } else {
1550 if (!args->from)
1551 arg = 1;
1552 if (m->flags & MOPT_CLEAR)
1553 arg = !arg;
1554 else if (unlikely(!(m->flags & MOPT_SET))) {
1555 ext4_msg(sb, KERN_WARNING,
1556 "buggy handling of option %s", opt);
1557 WARN_ON(1);
1558 return -1;
1559 }
1560 if (arg != 0)
1561 sbi->s_mount_opt |= m->mount_opt;
1562 else
1563 sbi->s_mount_opt &= ~m->mount_opt;
1637 } 1564 }
1638 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " 1565 return 1;
1639 "or missing value", opt);
1640 return -1;
1641} 1566}
1642 1567
1643static int parse_options(char *options, struct super_block *sb, 1568static int parse_options(char *options, struct super_block *sb,
@@ -2776,7 +2701,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2776 break; 2701 break;
2777 } 2702 }
2778 2703
2779 if (group == ngroups) 2704 if (group >= ngroups)
2780 ret = 1; 2705 ret = 1;
2781 2706
2782 if (!ret) { 2707 if (!ret) {
@@ -3016,33 +2941,34 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3016 return elr; 2941 return elr;
3017} 2942}
3018 2943
3019static int ext4_register_li_request(struct super_block *sb, 2944int ext4_register_li_request(struct super_block *sb,
3020 ext4_group_t first_not_zeroed) 2945 ext4_group_t first_not_zeroed)
3021{ 2946{
3022 struct ext4_sb_info *sbi = EXT4_SB(sb); 2947 struct ext4_sb_info *sbi = EXT4_SB(sb);
3023 struct ext4_li_request *elr; 2948 struct ext4_li_request *elr = NULL;
3024 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2949 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3025 int ret = 0; 2950 int ret = 0;
3026 2951
2952 mutex_lock(&ext4_li_mtx);
3027 if (sbi->s_li_request != NULL) { 2953 if (sbi->s_li_request != NULL) {
3028 /* 2954 /*
3029 * Reset timeout so it can be computed again, because 2955 * Reset timeout so it can be computed again, because
3030 * s_li_wait_mult might have changed. 2956 * s_li_wait_mult might have changed.
3031 */ 2957 */
3032 sbi->s_li_request->lr_timeout = 0; 2958 sbi->s_li_request->lr_timeout = 0;
3033 return 0; 2959 goto out;
3034 } 2960 }
3035 2961
3036 if (first_not_zeroed == ngroups || 2962 if (first_not_zeroed == ngroups ||
3037 (sb->s_flags & MS_RDONLY) || 2963 (sb->s_flags & MS_RDONLY) ||
3038 !test_opt(sb, INIT_INODE_TABLE)) 2964 !test_opt(sb, INIT_INODE_TABLE))
3039 return 0; 2965 goto out;
3040 2966
3041 elr = ext4_li_request_new(sb, first_not_zeroed); 2967 elr = ext4_li_request_new(sb, first_not_zeroed);
3042 if (!elr) 2968 if (!elr) {
3043 return -ENOMEM; 2969 ret = -ENOMEM;
3044 2970 goto out;
3045 mutex_lock(&ext4_li_mtx); 2971 }
3046 2972
3047 if (NULL == ext4_li_info) { 2973 if (NULL == ext4_li_info) {
3048 ret = ext4_li_info_new(); 2974 ret = ext4_li_info_new();
@@ -3379,7 +3305,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3379#ifdef CONFIG_EXT4_FS_POSIX_ACL 3305#ifdef CONFIG_EXT4_FS_POSIX_ACL
3380 set_opt(sb, POSIX_ACL); 3306 set_opt(sb, POSIX_ACL);
3381#endif 3307#endif
3382 set_opt(sb, MBLK_IO_SUBMIT);
3383 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3308 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3384 set_opt(sb, JOURNAL_DATA); 3309 set_opt(sb, JOURNAL_DATA);
3385 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3310 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3772,6 +3697,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3772 sbi->s_max_writeback_mb_bump = 128; 3697 sbi->s_max_writeback_mb_bump = 128;
3773 sbi->s_extent_max_zeroout_kb = 32; 3698 sbi->s_extent_max_zeroout_kb = 32;
3774 3699
3700 /* Register extent status tree shrinker */
3701 ext4_es_register_shrinker(sb);
3702
3775 /* 3703 /*
3776 * set up enough so that it can read an inode 3704 * set up enough so that it can read an inode
3777 */ 3705 */
@@ -4008,7 +3936,7 @@ no_journal:
4008 !(sb->s_flags & MS_RDONLY)) { 3936 !(sb->s_flags & MS_RDONLY)) {
4009 err = ext4_enable_quotas(sb); 3937 err = ext4_enable_quotas(sb);
4010 if (err) 3938 if (err)
4011 goto failed_mount7; 3939 goto failed_mount8;
4012 } 3940 }
4013#endif /* CONFIG_QUOTA */ 3941#endif /* CONFIG_QUOTA */
4014 3942
@@ -4035,6 +3963,10 @@ cantfind_ext4:
4035 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3963 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4036 goto failed_mount; 3964 goto failed_mount;
4037 3965
3966#ifdef CONFIG_QUOTA
3967failed_mount8:
3968 kobject_del(&sbi->s_kobj);
3969#endif
4038failed_mount7: 3970failed_mount7:
4039 ext4_unregister_li_request(sb); 3971 ext4_unregister_li_request(sb);
4040failed_mount6: 3972failed_mount6:
@@ -4476,16 +4408,12 @@ static void ext4_clear_journal_err(struct super_block *sb,
4476int ext4_force_commit(struct super_block *sb) 4408int ext4_force_commit(struct super_block *sb)
4477{ 4409{
4478 journal_t *journal; 4410 journal_t *journal;
4479 int ret = 0;
4480 4411
4481 if (sb->s_flags & MS_RDONLY) 4412 if (sb->s_flags & MS_RDONLY)
4482 return 0; 4413 return 0;
4483 4414
4484 journal = EXT4_SB(sb)->s_journal; 4415 journal = EXT4_SB(sb)->s_journal;
4485 if (journal) 4416 return ext4_journal_force_commit(journal);
4486 ret = ext4_journal_force_commit(journal);
4487
4488 return ret;
4489} 4417}
4490 4418
4491static int ext4_sync_fs(struct super_block *sb, int wait) 4419static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -4588,7 +4516,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4588 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4516 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4589 int err = 0; 4517 int err = 0;
4590#ifdef CONFIG_QUOTA 4518#ifdef CONFIG_QUOTA
4591 int i; 4519 int i, j;
4592#endif 4520#endif
4593 char *orig_data = kstrdup(data, GFP_KERNEL); 4521 char *orig_data = kstrdup(data, GFP_KERNEL);
4594 4522
@@ -4604,7 +4532,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4604#ifdef CONFIG_QUOTA 4532#ifdef CONFIG_QUOTA
4605 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4533 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4606 for (i = 0; i < MAXQUOTAS; i++) 4534 for (i = 0; i < MAXQUOTAS; i++)
4607 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 4535 if (sbi->s_qf_names[i]) {
4536 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4537 GFP_KERNEL);
4538 if (!old_opts.s_qf_names[i]) {
4539 for (j = 0; j < i; j++)
4540 kfree(old_opts.s_qf_names[j]);
4541 return -ENOMEM;
4542 }
4543 } else
4544 old_opts.s_qf_names[i] = NULL;
4608#endif 4545#endif
4609 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 4546 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4610 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 4547 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
@@ -4737,9 +4674,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4737#ifdef CONFIG_QUOTA 4674#ifdef CONFIG_QUOTA
4738 /* Release old quota file names */ 4675 /* Release old quota file names */
4739 for (i = 0; i < MAXQUOTAS; i++) 4676 for (i = 0; i < MAXQUOTAS; i++)
4740 if (old_opts.s_qf_names[i] && 4677 kfree(old_opts.s_qf_names[i]);
4741 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
4742 kfree(old_opts.s_qf_names[i]);
4743 if (enable_quota) { 4678 if (enable_quota) {
4744 if (sb_any_quota_suspended(sb)) 4679 if (sb_any_quota_suspended(sb))
4745 dquot_resume(sb, -1); 4680 dquot_resume(sb, -1);
@@ -4768,9 +4703,7 @@ restore_opts:
4768#ifdef CONFIG_QUOTA 4703#ifdef CONFIG_QUOTA
4769 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 4704 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4770 for (i = 0; i < MAXQUOTAS; i++) { 4705 for (i = 0; i < MAXQUOTAS; i++) {
4771 if (sbi->s_qf_names[i] && 4706 kfree(sbi->s_qf_names[i]);
4772 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
4773 kfree(sbi->s_qf_names[i]);
4774 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 4707 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4775 } 4708 }
4776#endif 4709#endif
@@ -4835,7 +4768,7 @@ static int ext4_write_dquot(struct dquot *dquot)
4835 struct inode *inode; 4768 struct inode *inode;
4836 4769
4837 inode = dquot_to_inode(dquot); 4770 inode = dquot_to_inode(dquot);
4838 handle = ext4_journal_start(inode, 4771 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
4839 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 4772 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
4840 if (IS_ERR(handle)) 4773 if (IS_ERR(handle))
4841 return PTR_ERR(handle); 4774 return PTR_ERR(handle);
@@ -4851,7 +4784,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
4851 int ret, err; 4784 int ret, err;
4852 handle_t *handle; 4785 handle_t *handle;
4853 4786
4854 handle = ext4_journal_start(dquot_to_inode(dquot), 4787 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
4855 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 4788 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
4856 if (IS_ERR(handle)) 4789 if (IS_ERR(handle))
4857 return PTR_ERR(handle); 4790 return PTR_ERR(handle);
@@ -4867,7 +4800,7 @@ static int ext4_release_dquot(struct dquot *dquot)
4867 int ret, err; 4800 int ret, err;
4868 handle_t *handle; 4801 handle_t *handle;
4869 4802
4870 handle = ext4_journal_start(dquot_to_inode(dquot), 4803 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
4871 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 4804 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
4872 if (IS_ERR(handle)) { 4805 if (IS_ERR(handle)) {
4873 /* Release dquot anyway to avoid endless cycle in dqput() */ 4806 /* Release dquot anyway to avoid endless cycle in dqput() */
@@ -4899,7 +4832,7 @@ static int ext4_write_info(struct super_block *sb, int type)
4899 handle_t *handle; 4832 handle_t *handle;
4900 4833
4901 /* Data block + inode block */ 4834 /* Data block + inode block */
4902 handle = ext4_journal_start(sb->s_root->d_inode, 2); 4835 handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
4903 if (IS_ERR(handle)) 4836 if (IS_ERR(handle))
4904 return PTR_ERR(handle); 4837 return PTR_ERR(handle);
4905 ret = dquot_commit_info(sb, type); 4838 ret = dquot_commit_info(sb, type);
@@ -5005,9 +4938,9 @@ static int ext4_enable_quotas(struct super_block *sb)
5005 DQUOT_USAGE_ENABLED); 4938 DQUOT_USAGE_ENABLED);
5006 if (err) { 4939 if (err) {
5007 ext4_warning(sb, 4940 ext4_warning(sb,
5008 "Failed to enable quota (type=%d) " 4941 "Failed to enable quota tracking "
5009 "tracking. Please run e2fsck to fix.", 4942 "(type=%d, err=%d). Please run "
5010 type); 4943 "e2fsck to fix.", type, err);
5011 return err; 4944 return err;
5012 } 4945 }
5013 } 4946 }
@@ -5045,7 +4978,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
5045 4978
5046 /* Update modification times of quota files when userspace can 4979 /* Update modification times of quota files when userspace can
5047 * start looking at them */ 4980 * start looking at them */
5048 handle = ext4_journal_start(inode, 1); 4981 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5049 if (IS_ERR(handle)) 4982 if (IS_ERR(handle))
5050 goto out; 4983 goto out;
5051 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4984 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a91ebc2b66f..3a120b277240 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -549,7 +549,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
549 error = ext4_handle_dirty_xattr_block(handle, inode, bh); 549 error = ext4_handle_dirty_xattr_block(handle, inode, bh);
550 if (IS_SYNC(inode)) 550 if (IS_SYNC(inode))
551 ext4_handle_sync(handle); 551 ext4_handle_sync(handle);
552 dquot_free_block(inode, 1); 552 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
553 ea_bdebug(bh, "refcount now=%d; releasing", 553 ea_bdebug(bh, "refcount now=%d; releasing",
554 le32_to_cpu(BHDR(bh)->h_refcount)); 554 le32_to_cpu(BHDR(bh)->h_refcount));
555 } 555 }
@@ -832,7 +832,8 @@ inserted:
832 else { 832 else {
833 /* The old block is released after updating 833 /* The old block is released after updating
834 the inode. */ 834 the inode. */
835 error = dquot_alloc_block(inode, 1); 835 error = dquot_alloc_block(inode,
836 EXT4_C2B(EXT4_SB(sb), 1));
836 if (error) 837 if (error)
837 goto cleanup; 838 goto cleanup;
838 error = ext4_journal_get_write_access(handle, 839 error = ext4_journal_get_write_access(handle,
@@ -886,17 +887,18 @@ inserted:
886 (unsigned long long)block); 887 (unsigned long long)block);
887 888
888 new_bh = sb_getblk(sb, block); 889 new_bh = sb_getblk(sb, block);
889 if (!new_bh) { 890 if (unlikely(!new_bh)) {
891 error = -ENOMEM;
890getblk_failed: 892getblk_failed:
891 ext4_free_blocks(handle, inode, NULL, block, 1, 893 ext4_free_blocks(handle, inode, NULL, block, 1,
892 EXT4_FREE_BLOCKS_METADATA); 894 EXT4_FREE_BLOCKS_METADATA);
893 error = -EIO;
894 goto cleanup; 895 goto cleanup;
895 } 896 }
896 lock_buffer(new_bh); 897 lock_buffer(new_bh);
897 error = ext4_journal_get_create_access(handle, new_bh); 898 error = ext4_journal_get_create_access(handle, new_bh);
898 if (error) { 899 if (error) {
899 unlock_buffer(new_bh); 900 unlock_buffer(new_bh);
901 error = -EIO;
900 goto getblk_failed; 902 goto getblk_failed;
901 } 903 }
902 memcpy(new_bh->b_data, s->base, new_bh->b_size); 904 memcpy(new_bh->b_data, s->base, new_bh->b_size);
@@ -928,7 +930,7 @@ cleanup:
928 return error; 930 return error;
929 931
930cleanup_dquot: 932cleanup_dquot:
931 dquot_free_block(inode, 1); 933 dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
932 goto cleanup; 934 goto cleanup;
933 935
934bad_block: 936bad_block:
@@ -1164,17 +1166,10 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1164{ 1166{
1165 handle_t *handle; 1167 handle_t *handle;
1166 int error, retries = 0; 1168 int error, retries = 0;
1167 int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); 1169 int credits = ext4_jbd2_credits_xattr(inode);
1168 1170
1169retry: 1171retry:
1170 /* 1172 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1171 * In case of inline data, we may push out the data to a block,
1172 * So reserve the journal space first.
1173 */
1174 if (ext4_has_inline_data(inode))
1175 credits += ext4_writepage_trans_blocks(inode) + 1;
1176
1177 handle = ext4_journal_start(inode, credits);
1178 if (IS_ERR(handle)) { 1173 if (IS_ERR(handle)) {
1179 error = PTR_ERR(handle); 1174 error = PTR_ERR(handle);
1180 } else { 1175 } else {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 69eda787a96a..aa25deb5c6cd 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -125,74 +125,6 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
125 struct ext4_xattr_info *i, 125 struct ext4_xattr_info *i,
126 struct ext4_xattr_ibody_find *is); 126 struct ext4_xattr_ibody_find *is);
127 127
128extern int ext4_has_inline_data(struct inode *inode);
129extern int ext4_get_inline_size(struct inode *inode);
130extern int ext4_get_max_inline_size(struct inode *inode);
131extern int ext4_find_inline_data_nolock(struct inode *inode);
132extern void ext4_write_inline_data(struct inode *inode,
133 struct ext4_iloc *iloc,
134 void *buffer, loff_t pos,
135 unsigned int len);
136extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
137 unsigned int len);
138extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
139 unsigned int len);
140extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
141
142extern int ext4_readpage_inline(struct inode *inode, struct page *page);
143extern int ext4_try_to_write_inline_data(struct address_space *mapping,
144 struct inode *inode,
145 loff_t pos, unsigned len,
146 unsigned flags,
147 struct page **pagep);
148extern int ext4_write_inline_data_end(struct inode *inode,
149 loff_t pos, unsigned len,
150 unsigned copied,
151 struct page *page);
152extern struct buffer_head *
153ext4_journalled_write_inline_data(struct inode *inode,
154 unsigned len,
155 struct page *page);
156extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
157 struct inode *inode,
158 loff_t pos, unsigned len,
159 unsigned flags,
160 struct page **pagep,
161 void **fsdata);
162extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
163 unsigned len, unsigned copied,
164 struct page *page);
165extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
166 struct inode *inode);
167extern int ext4_try_create_inline_dir(handle_t *handle,
168 struct inode *parent,
169 struct inode *inode);
170extern int ext4_read_inline_dir(struct file *filp,
171 void *dirent, filldir_t filldir,
172 int *has_inline_data);
173extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
174 const struct qstr *d_name,
175 struct ext4_dir_entry_2 **res_dir,
176 int *has_inline_data);
177extern int ext4_delete_inline_entry(handle_t *handle,
178 struct inode *dir,
179 struct ext4_dir_entry_2 *de_del,
180 struct buffer_head *bh,
181 int *has_inline_data);
182extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
183extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
184 struct ext4_dir_entry_2 **parent_de,
185 int *retval);
186extern int ext4_inline_data_fiemap(struct inode *inode,
187 struct fiemap_extent_info *fieinfo,
188 int *has_inline);
189extern int ext4_try_to_evict_inline_data(handle_t *handle,
190 struct inode *inode,
191 int needed);
192extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
193
194extern int ext4_convert_inline_data(struct inode *inode);
195
196#ifdef CONFIG_EXT4_FS_SECURITY 128#ifdef CONFIG_EXT4_FS_SECURITY
197extern int ext4_init_security(handle_t *handle, struct inode *inode, 129extern int ext4_init_security(handle_t *handle, struct inode *inode,
198 struct inode *dir, const struct qstr *qstr); 130 struct inode *dir, const struct qstr *qstr);