diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 14 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 187 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 58 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 29 | ||||
-rw-r--r-- | fs/ext4/extents.c | 193 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 75 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 5 | ||||
-rw-r--r-- | fs/ext4/file.c | 14 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 52 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 3 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 40 | ||||
-rw-r--r-- | fs/ext4/inline.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 1751 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 21 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 3 | ||||
-rw-r--r-- | fs/ext4/namei.c | 7 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 325 | ||||
-rw-r--r-- | fs/ext4/resize.c | 24 | ||||
-rw-r--r-- | fs/ext4/super.c | 155 |
19 files changed, 1525 insertions, 1435 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d0f13eada0ed..58339393fa6e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -682,11 +682,15 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) | |||
682 | 682 | ||
683 | static inline int test_root(ext4_group_t a, int b) | 683 | static inline int test_root(ext4_group_t a, int b) |
684 | { | 684 | { |
685 | int num = b; | 685 | while (1) { |
686 | 686 | if (a < b) | |
687 | while (a > num) | 687 | return 0; |
688 | num *= b; | 688 | if (a == b) |
689 | return num == a; | 689 | return 1; |
690 | if ((a % b) != 0) | ||
691 | return 0; | ||
692 | a = a / b; | ||
693 | } | ||
690 | } | 694 | } |
691 | 695 | ||
692 | static int ext4_group_sparse(ext4_group_t group) | 696 | static int ext4_group_sparse(ext4_group_t group) |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4af03ea84aa3..b577e45425b0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -177,38 +177,28 @@ struct ext4_map_blocks { | |||
177 | }; | 177 | }; |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * For delayed allocation tracking | ||
181 | */ | ||
182 | struct mpage_da_data { | ||
183 | struct inode *inode; | ||
184 | sector_t b_blocknr; /* start block number of extent */ | ||
185 | size_t b_size; /* size of extent */ | ||
186 | unsigned long b_state; /* state of the extent */ | ||
187 | unsigned long first_page, next_page; /* extent of pages */ | ||
188 | struct writeback_control *wbc; | ||
189 | int io_done; | ||
190 | int pages_written; | ||
191 | int retval; | ||
192 | }; | ||
193 | |||
194 | /* | ||
195 | * Flags for ext4_io_end->flags | 180 | * Flags for ext4_io_end->flags |
196 | */ | 181 | */ |
197 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 182 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
198 | #define EXT4_IO_END_ERROR 0x0002 | 183 | #define EXT4_IO_END_DIRECT 0x0002 |
199 | #define EXT4_IO_END_DIRECT 0x0004 | ||
200 | 184 | ||
201 | /* | 185 | /* |
202 | * For converting uninitialized extents on a work queue. | 186 | * For converting uninitialized extents on a work queue. 'handle' is used for |
187 | * buffered writeback. | ||
203 | */ | 188 | */ |
204 | typedef struct ext4_io_end { | 189 | typedef struct ext4_io_end { |
205 | struct list_head list; /* per-file finished IO list */ | 190 | struct list_head list; /* per-file finished IO list */ |
191 | handle_t *handle; /* handle reserved for extent | ||
192 | * conversion */ | ||
206 | struct inode *inode; /* file being written to */ | 193 | struct inode *inode; /* file being written to */ |
194 | struct bio *bio; /* Linked list of completed | ||
195 | * bios covering the extent */ | ||
207 | unsigned int flag; /* unwritten or not */ | 196 | unsigned int flag; /* unwritten or not */ |
208 | loff_t offset; /* offset in the file */ | 197 | loff_t offset; /* offset in the file */ |
209 | ssize_t size; /* size of the extent */ | 198 | ssize_t size; /* size of the extent */ |
210 | struct kiocb *iocb; /* iocb struct for AIO */ | 199 | struct kiocb *iocb; /* iocb struct for AIO */ |
211 | int result; /* error value for AIO */ | 200 | int result; /* error value for AIO */ |
201 | atomic_t count; /* reference counter */ | ||
212 | } ext4_io_end_t; | 202 | } ext4_io_end_t; |
213 | 203 | ||
214 | struct ext4_io_submit { | 204 | struct ext4_io_submit { |
@@ -581,11 +571,6 @@ enum { | |||
581 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | 571 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 |
582 | 572 | ||
583 | /* | 573 | /* |
584 | * Flags used by ext4_discard_partial_page_buffers | ||
585 | */ | ||
586 | #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001 | ||
587 | |||
588 | /* | ||
589 | * ioctl commands | 574 | * ioctl commands |
590 | */ | 575 | */ |
591 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS | 576 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS |
@@ -879,6 +864,7 @@ struct ext4_inode_info { | |||
879 | rwlock_t i_es_lock; | 864 | rwlock_t i_es_lock; |
880 | struct list_head i_es_lru; | 865 | struct list_head i_es_lru; |
881 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 866 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ |
867 | unsigned long i_touch_when; /* jiffies of last accessing */ | ||
882 | 868 | ||
883 | /* ialloc */ | 869 | /* ialloc */ |
884 | ext4_group_t i_last_alloc_group; | 870 | ext4_group_t i_last_alloc_group; |
@@ -903,12 +889,22 @@ struct ext4_inode_info { | |||
903 | qsize_t i_reserved_quota; | 889 | qsize_t i_reserved_quota; |
904 | #endif | 890 | #endif |
905 | 891 | ||
906 | /* completed IOs that might need unwritten extents handling */ | 892 | /* Lock protecting lists below */ |
907 | struct list_head i_completed_io_list; | ||
908 | spinlock_t i_completed_io_lock; | 893 | spinlock_t i_completed_io_lock; |
894 | /* | ||
895 | * Completed IOs that need unwritten extents handling and have | ||
896 | * transaction reserved | ||
897 | */ | ||
898 | struct list_head i_rsv_conversion_list; | ||
899 | /* | ||
900 | * Completed IOs that need unwritten extents handling and don't have | ||
901 | * transaction reserved | ||
902 | */ | ||
903 | struct list_head i_unrsv_conversion_list; | ||
909 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 904 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
910 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ | 905 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
911 | struct work_struct i_unwritten_work; /* deferred extent conversion */ | 906 | struct work_struct i_rsv_conversion_work; |
907 | struct work_struct i_unrsv_conversion_work; | ||
912 | 908 | ||
913 | spinlock_t i_block_reservation_lock; | 909 | spinlock_t i_block_reservation_lock; |
914 | 910 | ||
@@ -1245,7 +1241,6 @@ struct ext4_sb_info { | |||
1245 | unsigned int s_mb_stats; | 1241 | unsigned int s_mb_stats; |
1246 | unsigned int s_mb_order2_reqs; | 1242 | unsigned int s_mb_order2_reqs; |
1247 | unsigned int s_mb_group_prealloc; | 1243 | unsigned int s_mb_group_prealloc; |
1248 | unsigned int s_max_writeback_mb_bump; | ||
1249 | unsigned int s_max_dir_size_kb; | 1244 | unsigned int s_max_dir_size_kb; |
1250 | /* where last allocation was done - for stream allocation */ | 1245 | /* where last allocation was done - for stream allocation */ |
1251 | unsigned long s_mb_last_group; | 1246 | unsigned long s_mb_last_group; |
@@ -1281,8 +1276,10 @@ struct ext4_sb_info { | |||
1281 | struct flex_groups *s_flex_groups; | 1276 | struct flex_groups *s_flex_groups; |
1282 | ext4_group_t s_flex_groups_allocated; | 1277 | ext4_group_t s_flex_groups_allocated; |
1283 | 1278 | ||
1284 | /* workqueue for dio unwritten */ | 1279 | /* workqueue for unreserved extent convertions (dio) */ |
1285 | struct workqueue_struct *dio_unwritten_wq; | 1280 | struct workqueue_struct *unrsv_conversion_wq; |
1281 | /* workqueue for reserved extent conversions (buffered io) */ | ||
1282 | struct workqueue_struct *rsv_conversion_wq; | ||
1286 | 1283 | ||
1287 | /* timer for periodic error stats printing */ | 1284 | /* timer for periodic error stats printing */ |
1288 | struct timer_list s_err_report; | 1285 | struct timer_list s_err_report; |
@@ -1307,6 +1304,7 @@ struct ext4_sb_info { | |||
1307 | /* Reclaim extents from extent status tree */ | 1304 | /* Reclaim extents from extent status tree */ |
1308 | struct shrinker s_es_shrinker; | 1305 | struct shrinker s_es_shrinker; |
1309 | struct list_head s_es_lru; | 1306 | struct list_head s_es_lru; |
1307 | unsigned long s_es_last_sorted; | ||
1310 | struct percpu_counter s_extent_cache_cnt; | 1308 | struct percpu_counter s_extent_cache_cnt; |
1311 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1309 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
1312 | }; | 1310 | }; |
@@ -1342,6 +1340,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
1342 | struct ext4_io_end *io_end) | 1340 | struct ext4_io_end *io_end) |
1343 | { | 1341 | { |
1344 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1342 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1343 | /* Writeback has to have coversion transaction reserved */ | ||
1344 | WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && | ||
1345 | !(io_end->flag & EXT4_IO_END_DIRECT)); | ||
1345 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1346 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1346 | atomic_inc(&EXT4_I(inode)->i_unwritten); | 1347 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1347 | } | 1348 | } |
@@ -1999,7 +2000,6 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype) | |||
1999 | 2000 | ||
2000 | /* fsync.c */ | 2001 | /* fsync.c */ |
2001 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); | 2002 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
2002 | extern int ext4_flush_unwritten_io(struct inode *); | ||
2003 | 2003 | ||
2004 | /* hash.c */ | 2004 | /* hash.c */ |
2005 | extern int ext4fs_dirhash(const char *name, int len, struct | 2005 | extern int ext4fs_dirhash(const char *name, int len, struct |
@@ -2088,7 +2088,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
2089 | extern int ext4_can_truncate(struct inode *inode); | 2089 | extern int ext4_can_truncate(struct inode *inode); |
2090 | extern void ext4_truncate(struct inode *); | 2090 | extern void ext4_truncate(struct inode *); |
2091 | extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); | 2091 | extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); |
2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | 2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); |
2093 | extern void ext4_set_inode_flags(struct inode *); | 2093 | extern void ext4_set_inode_flags(struct inode *); |
2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
@@ -2096,9 +2096,12 @@ extern int ext4_alloc_da_blocks(struct inode *inode); | |||
2096 | extern void ext4_set_aops(struct inode *inode); | 2096 | extern void ext4_set_aops(struct inode *inode); |
2097 | extern int ext4_writepage_trans_blocks(struct inode *); | 2097 | extern int ext4_writepage_trans_blocks(struct inode *); |
2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
2099 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | 2099 | extern int ext4_block_truncate_page(handle_t *handle, |
2100 | struct address_space *mapping, loff_t from, | 2100 | struct address_space *mapping, loff_t from); |
2101 | loff_t length, int flags); | 2101 | extern int ext4_block_zero_page_range(handle_t *handle, |
2102 | struct address_space *mapping, loff_t from, loff_t length); | ||
2103 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | ||
2104 | loff_t lstart, loff_t lend); | ||
2102 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2105 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2103 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 2106 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
2104 | extern void ext4_da_update_reserve_space(struct inode *inode, | 2107 | extern void ext4_da_update_reserve_space(struct inode *inode, |
@@ -2111,7 +2114,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
2111 | const struct iovec *iov, loff_t offset, | 2114 | const struct iovec *iov, loff_t offset, |
2112 | unsigned long nr_segs); | 2115 | unsigned long nr_segs); |
2113 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2116 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
2114 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); | 2117 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
2115 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2118 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
2116 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, | 2119 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, |
2117 | ext4_lblk_t first, ext4_lblk_t stop); | 2120 | ext4_lblk_t first, ext4_lblk_t stop); |
@@ -2166,42 +2169,96 @@ extern int ext4_alloc_flex_bg_array(struct super_block *sb, | |||
2166 | ext4_group_t ngroup); | 2169 | ext4_group_t ngroup); |
2167 | extern const char *ext4_decode_error(struct super_block *sb, int errno, | 2170 | extern const char *ext4_decode_error(struct super_block *sb, int errno, |
2168 | char nbuf[16]); | 2171 | char nbuf[16]); |
2172 | |||
2169 | extern __printf(4, 5) | 2173 | extern __printf(4, 5) |
2170 | void __ext4_error(struct super_block *, const char *, unsigned int, | 2174 | void __ext4_error(struct super_block *, const char *, unsigned int, |
2171 | const char *, ...); | 2175 | const char *, ...); |
2172 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, \ | ||
2173 | __LINE__, ## message) | ||
2174 | extern __printf(5, 6) | 2176 | extern __printf(5, 6) |
2175 | void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, | 2177 | void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, |
2176 | const char *, ...); | 2178 | const char *, ...); |
2177 | extern __printf(5, 6) | 2179 | extern __printf(5, 6) |
2178 | void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, | 2180 | void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, |
2179 | const char *, ...); | 2181 | const char *, ...); |
2180 | extern void __ext4_std_error(struct super_block *, const char *, | 2182 | extern void __ext4_std_error(struct super_block *, const char *, |
2181 | unsigned int, int); | 2183 | unsigned int, int); |
2182 | extern __printf(4, 5) | 2184 | extern __printf(4, 5) |
2183 | void __ext4_abort(struct super_block *, const char *, unsigned int, | 2185 | void __ext4_abort(struct super_block *, const char *, unsigned int, |
2184 | const char *, ...); | 2186 | const char *, ...); |
2185 | #define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ | ||
2186 | __LINE__, ## message) | ||
2187 | extern __printf(4, 5) | 2187 | extern __printf(4, 5) |
2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, | 2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, |
2189 | const char *, ...); | 2189 | const char *, ...); |
2190 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ | ||
2191 | __LINE__, ## message) | ||
2192 | extern __printf(3, 4) | 2190 | extern __printf(3, 4) |
2193 | void ext4_msg(struct super_block *, const char *, const char *, ...); | 2191 | void __ext4_msg(struct super_block *, const char *, const char *, ...); |
2194 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, | 2192 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, |
2195 | const char *, unsigned int, const char *); | 2193 | const char *, unsigned int, const char *); |
2196 | #define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ | ||
2197 | __LINE__, msg) | ||
2198 | extern __printf(7, 8) | 2194 | extern __printf(7, 8) |
2199 | void __ext4_grp_locked_error(const char *, unsigned int, | 2195 | void __ext4_grp_locked_error(const char *, unsigned int, |
2200 | struct super_block *, ext4_group_t, | 2196 | struct super_block *, ext4_group_t, |
2201 | unsigned long, ext4_fsblk_t, | 2197 | unsigned long, ext4_fsblk_t, |
2202 | const char *, ...); | 2198 | const char *, ...); |
2203 | #define ext4_grp_locked_error(sb, grp, message...) \ | 2199 | |
2204 | __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) | 2200 | #ifdef CONFIG_PRINTK |
2201 | |||
2202 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
2203 | __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) | ||
2204 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
2205 | __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) | ||
2206 | #define ext4_error(sb, fmt, ...) \ | ||
2207 | __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2208 | #define ext4_abort(sb, fmt, ...) \ | ||
2209 | __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2210 | #define ext4_warning(sb, fmt, ...) \ | ||
2211 | __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2212 | #define ext4_msg(sb, level, fmt, ...) \ | ||
2213 | __ext4_msg(sb, level, fmt, ##__VA_ARGS__) | ||
2214 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
2215 | __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) | ||
2216 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
2217 | __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ | ||
2218 | fmt, ##__VA_ARGS__) | ||
2219 | |||
2220 | #else | ||
2221 | |||
2222 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
2223 | do { \ | ||
2224 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2225 | __ext4_error_inode(inode, "", 0, block, " "); \ | ||
2226 | } while (0) | ||
2227 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
2228 | do { \ | ||
2229 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2230 | __ext4_error_file(file, "", 0, block, " "); \ | ||
2231 | } while (0) | ||
2232 | #define ext4_error(sb, fmt, ...) \ | ||
2233 | do { \ | ||
2234 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2235 | __ext4_error(sb, "", 0, " "); \ | ||
2236 | } while (0) | ||
2237 | #define ext4_abort(sb, fmt, ...) \ | ||
2238 | do { \ | ||
2239 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2240 | __ext4_abort(sb, "", 0, " "); \ | ||
2241 | } while (0) | ||
2242 | #define ext4_warning(sb, fmt, ...) \ | ||
2243 | do { \ | ||
2244 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2245 | __ext4_warning(sb, "", 0, " "); \ | ||
2246 | } while (0) | ||
2247 | #define ext4_msg(sb, level, fmt, ...) \ | ||
2248 | do { \ | ||
2249 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2250 | __ext4_msg(sb, "", " "); \ | ||
2251 | } while (0) | ||
2252 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
2253 | __dump_mmp_msg(sb, mmp, "", 0, "") | ||
2254 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
2255 | do { \ | ||
2256 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2257 | __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ | ||
2258 | } while (0) | ||
2259 | |||
2260 | #endif | ||
2261 | |||
2205 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 2262 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
2206 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 2263 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
2207 | __u32 compat); | 2264 | __u32 compat); |
@@ -2312,6 +2369,7 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
2312 | { | 2369 | { |
2313 | struct ext4_group_info ***grp_info; | 2370 | struct ext4_group_info ***grp_info; |
2314 | long indexv, indexh; | 2371 | long indexv, indexh; |
2372 | BUG_ON(group >= EXT4_SB(sb)->s_groups_count); | ||
2315 | grp_info = EXT4_SB(sb)->s_group_info; | 2373 | grp_info = EXT4_SB(sb)->s_group_info; |
2316 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); | 2374 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); |
2317 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); | 2375 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); |
@@ -2598,8 +2656,7 @@ struct ext4_extent; | |||
2598 | 2656 | ||
2599 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 2657 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
2600 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 2658 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
2601 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | 2659 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); |
2602 | int chunk); | ||
2603 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | 2660 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
2604 | struct ext4_map_blocks *map, int flags); | 2661 | struct ext4_map_blocks *map, int flags); |
2605 | extern void ext4_ext_truncate(handle_t *, struct inode *); | 2662 | extern void ext4_ext_truncate(handle_t *, struct inode *); |
@@ -2609,8 +2666,8 @@ extern void ext4_ext_init(struct super_block *); | |||
2609 | extern void ext4_ext_release(struct super_block *); | 2666 | extern void ext4_ext_release(struct super_block *); |
2610 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, | 2667 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
2611 | loff_t len); | 2668 | loff_t len); |
2612 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 2669 | extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
2613 | ssize_t len); | 2670 | loff_t offset, ssize_t len); |
2614 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 2671 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
2615 | struct ext4_map_blocks *map, int flags); | 2672 | struct ext4_map_blocks *map, int flags); |
2616 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 2673 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
@@ -2650,12 +2707,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2650 | 2707 | ||
2651 | /* page-io.c */ | 2708 | /* page-io.c */ |
2652 | extern int __init ext4_init_pageio(void); | 2709 | extern int __init ext4_init_pageio(void); |
2653 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | ||
2654 | extern void ext4_exit_pageio(void); | 2710 | extern void ext4_exit_pageio(void); |
2655 | extern void ext4_ioend_shutdown(struct inode *); | ||
2656 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2657 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2711 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2658 | extern void ext4_end_io_work(struct work_struct *work); | 2712 | extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); |
2713 | extern int ext4_put_io_end(ext4_io_end_t *io_end); | ||
2714 | extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); | ||
2715 | extern void ext4_io_submit_init(struct ext4_io_submit *io, | ||
2716 | struct writeback_control *wbc); | ||
2717 | extern void ext4_end_io_rsv_work(struct work_struct *work); | ||
2718 | extern void ext4_end_io_unrsv_work(struct work_struct *work); | ||
2659 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2719 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2660 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2720 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
2661 | struct page *page, | 2721 | struct page *page, |
@@ -2668,20 +2728,17 @@ extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); | |||
2668 | extern int ext4_mmp_csum_verify(struct super_block *sb, | 2728 | extern int ext4_mmp_csum_verify(struct super_block *sb, |
2669 | struct mmp_struct *mmp); | 2729 | struct mmp_struct *mmp); |
2670 | 2730 | ||
2671 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2731 | /* |
2732 | * Note that these flags will never ever appear in a buffer_head's state flag. | ||
2733 | * See EXT4_MAP_... to see where this is used. | ||
2734 | */ | ||
2672 | enum ext4_state_bits { | 2735 | enum ext4_state_bits { |
2673 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2736 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
2674 | = BH_JBDPrivateStart, | 2737 | = BH_JBDPrivateStart, |
2675 | BH_AllocFromCluster, /* allocated blocks were part of already | 2738 | BH_AllocFromCluster, /* allocated blocks were part of already |
2676 | * allocated cluster. Note that this flag will | 2739 | * allocated cluster. */ |
2677 | * never, ever appear in a buffer_head's state | ||
2678 | * flag. See EXT4_MAP_FROM_CLUSTER to see where | ||
2679 | * this is used. */ | ||
2680 | }; | 2740 | }; |
2681 | 2741 | ||
2682 | BUFFER_FNS(Uninit, uninit) | ||
2683 | TAS_BUFFER_FNS(Uninit, uninit) | ||
2684 | |||
2685 | /* | 2742 | /* |
2686 | * Add new method to test whether block and inode bitmaps are properly | 2743 | * Add new method to test whether block and inode bitmaps are properly |
2687 | * initialized. With uninit_bg reading the block from disk is not enough | 2744 | * initialized. With uninit_bg reading the block from disk is not enough |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 451eb4045330..72a3600aedbd 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -38,31 +38,43 @@ static void ext4_put_nojournal(handle_t *handle) | |||
38 | /* | 38 | /* |
39 | * Wrappers for jbd2_journal_start/end. | 39 | * Wrappers for jbd2_journal_start/end. |
40 | */ | 40 | */ |
41 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 41 | static int ext4_journal_check_start(struct super_block *sb) |
42 | int type, int nblocks) | ||
43 | { | 42 | { |
44 | journal_t *journal; | 43 | journal_t *journal; |
45 | 44 | ||
46 | might_sleep(); | 45 | might_sleep(); |
47 | |||
48 | trace_ext4_journal_start(sb, nblocks, _RET_IP_); | ||
49 | if (sb->s_flags & MS_RDONLY) | 46 | if (sb->s_flags & MS_RDONLY) |
50 | return ERR_PTR(-EROFS); | 47 | return -EROFS; |
51 | |||
52 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); | 48 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); |
53 | journal = EXT4_SB(sb)->s_journal; | 49 | journal = EXT4_SB(sb)->s_journal; |
54 | if (!journal) | ||
55 | return ext4_get_nojournal(); | ||
56 | /* | 50 | /* |
57 | * Special case here: if the journal has aborted behind our | 51 | * Special case here: if the journal has aborted behind our |
58 | * backs (eg. EIO in the commit thread), then we still need to | 52 | * backs (eg. EIO in the commit thread), then we still need to |
59 | * take the FS itself readonly cleanly. | 53 | * take the FS itself readonly cleanly. |
60 | */ | 54 | */ |
61 | if (is_journal_aborted(journal)) { | 55 | if (journal && is_journal_aborted(journal)) { |
62 | ext4_abort(sb, "Detected aborted journal"); | 56 | ext4_abort(sb, "Detected aborted journal"); |
63 | return ERR_PTR(-EROFS); | 57 | return -EROFS; |
64 | } | 58 | } |
65 | return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line); | 59 | return 0; |
60 | } | ||
61 | |||
62 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | ||
63 | int type, int blocks, int rsv_blocks) | ||
64 | { | ||
65 | journal_t *journal; | ||
66 | int err; | ||
67 | |||
68 | trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_); | ||
69 | err = ext4_journal_check_start(sb); | ||
70 | if (err < 0) | ||
71 | return ERR_PTR(err); | ||
72 | |||
73 | journal = EXT4_SB(sb)->s_journal; | ||
74 | if (!journal) | ||
75 | return ext4_get_nojournal(); | ||
76 | return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS, | ||
77 | type, line); | ||
66 | } | 78 | } |
67 | 79 | ||
68 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | 80 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) |
@@ -86,6 +98,30 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | |||
86 | return err; | 98 | return err; |
87 | } | 99 | } |
88 | 100 | ||
101 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
102 | int type) | ||
103 | { | ||
104 | struct super_block *sb; | ||
105 | int err; | ||
106 | |||
107 | if (!ext4_handle_valid(handle)) | ||
108 | return ext4_get_nojournal(); | ||
109 | |||
110 | sb = handle->h_journal->j_private; | ||
111 | trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits, | ||
112 | _RET_IP_); | ||
113 | err = ext4_journal_check_start(sb); | ||
114 | if (err < 0) { | ||
115 | jbd2_journal_free_reserved(handle); | ||
116 | return ERR_PTR(err); | ||
117 | } | ||
118 | |||
119 | err = jbd2_journal_start_reserved(handle, type, line); | ||
120 | if (err < 0) | ||
121 | return ERR_PTR(err); | ||
122 | return handle; | ||
123 | } | ||
124 | |||
89 | void ext4_journal_abort_handle(const char *caller, unsigned int line, | 125 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
90 | const char *err_fn, struct buffer_head *bh, | 126 | const char *err_fn, struct buffer_head *bh, |
91 | handle_t *handle, int err) | 127 | handle_t *handle, int err) |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index c8c6885406db..2877258d9497 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) | |||
134 | #define EXT4_HT_MIGRATE 8 | 134 | #define EXT4_HT_MIGRATE 8 |
135 | #define EXT4_HT_MOVE_EXTENTS 9 | 135 | #define EXT4_HT_MOVE_EXTENTS 9 |
136 | #define EXT4_HT_XATTR 10 | 136 | #define EXT4_HT_XATTR 10 |
137 | #define EXT4_HT_MAX 11 | 137 | #define EXT4_HT_EXT_CONVERT 11 |
138 | #define EXT4_HT_MAX 12 | ||
138 | 139 | ||
139 | /** | 140 | /** |
140 | * struct ext4_journal_cb_entry - Base structure for callback information. | 141 | * struct ext4_journal_cb_entry - Base structure for callback information. |
@@ -265,7 +266,7 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, | |||
265 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) | 266 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) |
266 | 267 | ||
267 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 268 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, |
268 | int type, int nblocks); | 269 | int type, int blocks, int rsv_blocks); |
269 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); | 270 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); |
270 | 271 | ||
271 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) | 272 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) |
@@ -300,21 +301,37 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
300 | } | 301 | } |
301 | 302 | ||
302 | #define ext4_journal_start_sb(sb, type, nblocks) \ | 303 | #define ext4_journal_start_sb(sb, type, nblocks) \ |
303 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks)) | 304 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0) |
304 | 305 | ||
305 | #define ext4_journal_start(inode, type, nblocks) \ | 306 | #define ext4_journal_start(inode, type, nblocks) \ |
306 | __ext4_journal_start((inode), __LINE__, (type), (nblocks)) | 307 | __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0) |
308 | |||
309 | #define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \ | ||
310 | __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks)) | ||
307 | 311 | ||
308 | static inline handle_t *__ext4_journal_start(struct inode *inode, | 312 | static inline handle_t *__ext4_journal_start(struct inode *inode, |
309 | unsigned int line, int type, | 313 | unsigned int line, int type, |
310 | int nblocks) | 314 | int blocks, int rsv_blocks) |
311 | { | 315 | { |
312 | return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks); | 316 | return __ext4_journal_start_sb(inode->i_sb, line, type, blocks, |
317 | rsv_blocks); | ||
313 | } | 318 | } |
314 | 319 | ||
315 | #define ext4_journal_stop(handle) \ | 320 | #define ext4_journal_stop(handle) \ |
316 | __ext4_journal_stop(__func__, __LINE__, (handle)) | 321 | __ext4_journal_stop(__func__, __LINE__, (handle)) |
317 | 322 | ||
323 | #define ext4_journal_start_reserved(handle, type) \ | ||
324 | __ext4_journal_start_reserved((handle), __LINE__, (type)) | ||
325 | |||
326 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
327 | int type); | ||
328 | |||
329 | static inline void ext4_journal_free_reserved(handle_t *handle) | ||
330 | { | ||
331 | if (ext4_handle_valid(handle)) | ||
332 | jbd2_journal_free_reserved(handle); | ||
333 | } | ||
334 | |||
318 | static inline handle_t *ext4_journal_current_handle(void) | 335 | static inline handle_t *ext4_journal_current_handle(void) |
319 | { | 336 | { |
320 | return journal_current_handle(); | 337 | return journal_current_handle(); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9cf..7097b0f680e6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2125,7 +2125,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2125 | next_del = ext4_find_delayed_extent(inode, &es); | 2125 | next_del = ext4_find_delayed_extent(inode, &es); |
2126 | if (!exists && next_del) { | 2126 | if (!exists && next_del) { |
2127 | exists = 1; | 2127 | exists = 1; |
2128 | flags |= FIEMAP_EXTENT_DELALLOC; | 2128 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2129 | FIEMAP_EXTENT_UNKNOWN); | ||
2129 | } | 2130 | } |
2130 | up_read(&EXT4_I(inode)->i_data_sem); | 2131 | up_read(&EXT4_I(inode)->i_data_sem); |
2131 | 2132 | ||
@@ -2328,17 +2329,15 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, | |||
2328 | } | 2329 | } |
2329 | 2330 | ||
2330 | /* | 2331 | /* |
2331 | * How many index/leaf blocks need to change/allocate to modify nrblocks? | 2332 | * How many index/leaf blocks need to change/allocate to add @extents extents? |
2332 | * | 2333 | * |
2333 | * if nrblocks are fit in a single extent (chunk flag is 1), then | 2334 | * If we add a single extent, then in the worse case, each tree level |
2334 | * in the worse case, each tree level index/leaf need to be changed | 2335 | * index/leaf need to be changed in case of the tree split. |
2335 | * if the tree split due to insert a new extent, then the old tree | ||
2336 | * index/leaf need to be updated too | ||
2337 | * | 2336 | * |
2338 | * If the nrblocks are discontiguous, they could cause | 2337 | * If more extents are inserted, they could cause the whole tree split more |
2339 | * the whole tree split more than once, but this is really rare. | 2338 | * than once, but this is really rare. |
2340 | */ | 2339 | */ |
2341 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 2340 | int ext4_ext_index_trans_blocks(struct inode *inode, int extents) |
2342 | { | 2341 | { |
2343 | int index; | 2342 | int index; |
2344 | int depth; | 2343 | int depth; |
@@ -2349,7 +2348,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
2349 | 2348 | ||
2350 | depth = ext_depth(inode); | 2349 | depth = ext_depth(inode); |
2351 | 2350 | ||
2352 | if (chunk) | 2351 | if (extents <= 1) |
2353 | index = depth * 2; | 2352 | index = depth * 2; |
2354 | else | 2353 | else |
2355 | index = depth * 3; | 2354 | index = depth * 3; |
@@ -2357,20 +2356,24 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
2357 | return index; | 2356 | return index; |
2358 | } | 2357 | } |
2359 | 2358 | ||
2359 | static inline int get_default_free_blocks_flags(struct inode *inode) | ||
2360 | { | ||
2361 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2362 | return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
2363 | else if (ext4_should_journal_data(inode)) | ||
2364 | return EXT4_FREE_BLOCKS_FORGET; | ||
2365 | return 0; | ||
2366 | } | ||
2367 | |||
2360 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 2368 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
2361 | struct ext4_extent *ex, | 2369 | struct ext4_extent *ex, |
2362 | ext4_fsblk_t *partial_cluster, | 2370 | long long *partial_cluster, |
2363 | ext4_lblk_t from, ext4_lblk_t to) | 2371 | ext4_lblk_t from, ext4_lblk_t to) |
2364 | { | 2372 | { |
2365 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2373 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2366 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2374 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2367 | ext4_fsblk_t pblk; | 2375 | ext4_fsblk_t pblk; |
2368 | int flags = 0; | 2376 | int flags = get_default_free_blocks_flags(inode); |
2369 | |||
2370 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2371 | flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
2372 | else if (ext4_should_journal_data(inode)) | ||
2373 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
2374 | 2377 | ||
2375 | /* | 2378 | /* |
2376 | * For bigalloc file systems, we never free a partial cluster | 2379 | * For bigalloc file systems, we never free a partial cluster |
@@ -2388,7 +2391,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2388 | * partial cluster here. | 2391 | * partial cluster here. |
2389 | */ | 2392 | */ |
2390 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | 2393 | pblk = ext4_ext_pblock(ex) + ee_len - 1; |
2391 | if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | 2394 | if ((*partial_cluster > 0) && |
2395 | (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | ||
2392 | ext4_free_blocks(handle, inode, NULL, | 2396 | ext4_free_blocks(handle, inode, NULL, |
2393 | EXT4_C2B(sbi, *partial_cluster), | 2397 | EXT4_C2B(sbi, *partial_cluster), |
2394 | sbi->s_cluster_ratio, flags); | 2398 | sbi->s_cluster_ratio, flags); |
@@ -2414,41 +2418,46 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2414 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2418 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2415 | /* tail removal */ | 2419 | /* tail removal */ |
2416 | ext4_lblk_t num; | 2420 | ext4_lblk_t num; |
2421 | unsigned int unaligned; | ||
2417 | 2422 | ||
2418 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2423 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2419 | pblk = ext4_ext_pblock(ex) + ee_len - num; | 2424 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
2420 | ext_debug("free last %u blocks starting %llu\n", num, pblk); | 2425 | /* |
2426 | * Usually we want to free partial cluster at the end of the | ||
2427 | * extent, except for the situation when the cluster is still | ||
2428 | * used by any other extent (partial_cluster is negative). | ||
2429 | */ | ||
2430 | if (*partial_cluster < 0 && | ||
2431 | -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) | ||
2432 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; | ||
2433 | |||
2434 | ext_debug("free last %u blocks starting %llu partial %lld\n", | ||
2435 | num, pblk, *partial_cluster); | ||
2421 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); | 2436 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); |
2422 | /* | 2437 | /* |
2423 | * If the block range to be freed didn't start at the | 2438 | * If the block range to be freed didn't start at the |
2424 | * beginning of a cluster, and we removed the entire | 2439 | * beginning of a cluster, and we removed the entire |
2425 | * extent, save the partial cluster here, since we | 2440 | * extent and the cluster is not used by any other extent, |
2426 | * might need to delete if we determine that the | 2441 | * save the partial cluster here, since we might need to |
2427 | * truncate operation has removed all of the blocks in | 2442 | * delete if we determine that the truncate operation has |
2428 | * the cluster. | 2443 | * removed all of the blocks in the cluster. |
2444 | * | ||
2445 | * On the other hand, if we did not manage to free the whole | ||
2446 | * extent, we have to mark the cluster as used (store negative | ||
2447 | * cluster number in partial_cluster). | ||
2429 | */ | 2448 | */ |
2430 | if (pblk & (sbi->s_cluster_ratio - 1) && | 2449 | unaligned = pblk & (sbi->s_cluster_ratio - 1); |
2431 | (ee_len == num)) | 2450 | if (unaligned && (ee_len == num) && |
2451 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | ||
2432 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2452 | *partial_cluster = EXT4_B2C(sbi, pblk); |
2433 | else | 2453 | else if (unaligned) |
2454 | *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); | ||
2455 | else if (*partial_cluster > 0) | ||
2434 | *partial_cluster = 0; | 2456 | *partial_cluster = 0; |
2435 | } else if (from == le32_to_cpu(ex->ee_block) | 2457 | } else |
2436 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2458 | ext4_error(sbi->s_sb, "strange request: removal(2) " |
2437 | /* head removal */ | 2459 | "%u-%u from %u:%u\n", |
2438 | ext4_lblk_t num; | 2460 | from, to, le32_to_cpu(ex->ee_block), ee_len); |
2439 | ext4_fsblk_t start; | ||
2440 | |||
2441 | num = to - from; | ||
2442 | start = ext4_ext_pblock(ex); | ||
2443 | |||
2444 | ext_debug("free first %u blocks starting %llu\n", num, start); | ||
2445 | ext4_free_blocks(handle, inode, NULL, start, num, flags); | ||
2446 | |||
2447 | } else { | ||
2448 | printk(KERN_INFO "strange request: removal(2) " | ||
2449 | "%u-%u from %u:%u\n", | ||
2450 | from, to, le32_to_cpu(ex->ee_block), ee_len); | ||
2451 | } | ||
2452 | return 0; | 2461 | return 0; |
2453 | } | 2462 | } |
2454 | 2463 | ||
@@ -2461,12 +2470,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2461 | * @handle: The journal handle | 2470 | * @handle: The journal handle |
2462 | * @inode: The files inode | 2471 | * @inode: The files inode |
2463 | * @path: The path to the leaf | 2472 | * @path: The path to the leaf |
2473 | * @partial_cluster: The cluster which we'll have to free if all extents | ||
2474 | * has been released from it. It gets negative in case | ||
2475 | * that the cluster is still used. | ||
2464 | * @start: The first block to remove | 2476 | * @start: The first block to remove |
2465 | * @end: The last block to remove | 2477 | * @end: The last block to remove |
2466 | */ | 2478 | */ |
2467 | static int | 2479 | static int |
2468 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2480 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
2469 | struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, | 2481 | struct ext4_ext_path *path, |
2482 | long long *partial_cluster, | ||
2470 | ext4_lblk_t start, ext4_lblk_t end) | 2483 | ext4_lblk_t start, ext4_lblk_t end) |
2471 | { | 2484 | { |
2472 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2485 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -2479,6 +2492,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2479 | unsigned short ex_ee_len; | 2492 | unsigned short ex_ee_len; |
2480 | unsigned uninitialized = 0; | 2493 | unsigned uninitialized = 0; |
2481 | struct ext4_extent *ex; | 2494 | struct ext4_extent *ex; |
2495 | ext4_fsblk_t pblk; | ||
2482 | 2496 | ||
2483 | /* the header must be checked already in ext4_ext_remove_space() */ | 2497 | /* the header must be checked already in ext4_ext_remove_space() */ |
2484 | ext_debug("truncate since %u in leaf to %u\n", start, end); | 2498 | ext_debug("truncate since %u in leaf to %u\n", start, end); |
@@ -2490,7 +2504,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2490 | return -EIO; | 2504 | return -EIO; |
2491 | } | 2505 | } |
2492 | /* find where to start removing */ | 2506 | /* find where to start removing */ |
2493 | ex = EXT_LAST_EXTENT(eh); | 2507 | ex = path[depth].p_ext; |
2508 | if (!ex) | ||
2509 | ex = EXT_LAST_EXTENT(eh); | ||
2494 | 2510 | ||
2495 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2511 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2496 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2512 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2517,6 +2533,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2517 | 2533 | ||
2518 | /* If this extent is beyond the end of the hole, skip it */ | 2534 | /* If this extent is beyond the end of the hole, skip it */ |
2519 | if (end < ex_ee_block) { | 2535 | if (end < ex_ee_block) { |
2536 | /* | ||
2537 | * We're going to skip this extent and move to another, | ||
2538 | * so if this extent is not cluster aligned we have | ||
2539 | * to mark the current cluster as used to avoid | ||
2540 | * accidentally freeing it later on | ||
2541 | */ | ||
2542 | pblk = ext4_ext_pblock(ex); | ||
2543 | if (pblk & (sbi->s_cluster_ratio - 1)) | ||
2544 | *partial_cluster = | ||
2545 | -((long long)EXT4_B2C(sbi, pblk)); | ||
2520 | ex--; | 2546 | ex--; |
2521 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2547 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2522 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2548 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2592,7 +2618,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2592 | sizeof(struct ext4_extent)); | 2618 | sizeof(struct ext4_extent)); |
2593 | } | 2619 | } |
2594 | le16_add_cpu(&eh->eh_entries, -1); | 2620 | le16_add_cpu(&eh->eh_entries, -1); |
2595 | } else | 2621 | } else if (*partial_cluster > 0) |
2596 | *partial_cluster = 0; | 2622 | *partial_cluster = 0; |
2597 | 2623 | ||
2598 | err = ext4_ext_dirty(handle, inode, path + depth); | 2624 | err = ext4_ext_dirty(handle, inode, path + depth); |
@@ -2610,17 +2636,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2610 | err = ext4_ext_correct_indexes(handle, inode, path); | 2636 | err = ext4_ext_correct_indexes(handle, inode, path); |
2611 | 2637 | ||
2612 | /* | 2638 | /* |
2613 | * If there is still a entry in the leaf node, check to see if | 2639 | * Free the partial cluster only if the current extent does not |
2614 | * it references the partial cluster. This is the only place | 2640 | * reference it. Otherwise we might free used cluster. |
2615 | * where it could; if it doesn't, we can free the cluster. | ||
2616 | */ | 2641 | */ |
2617 | if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && | 2642 | if (*partial_cluster > 0 && |
2618 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2643 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != |
2619 | *partial_cluster)) { | 2644 | *partial_cluster)) { |
2620 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2645 | int flags = get_default_free_blocks_flags(inode); |
2621 | |||
2622 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2623 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2624 | 2646 | ||
2625 | ext4_free_blocks(handle, inode, NULL, | 2647 | ext4_free_blocks(handle, inode, NULL, |
2626 | EXT4_C2B(sbi, *partial_cluster), | 2648 | EXT4_C2B(sbi, *partial_cluster), |
@@ -2664,7 +2686,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2664 | struct super_block *sb = inode->i_sb; | 2686 | struct super_block *sb = inode->i_sb; |
2665 | int depth = ext_depth(inode); | 2687 | int depth = ext_depth(inode); |
2666 | struct ext4_ext_path *path = NULL; | 2688 | struct ext4_ext_path *path = NULL; |
2667 | ext4_fsblk_t partial_cluster = 0; | 2689 | long long partial_cluster = 0; |
2668 | handle_t *handle; | 2690 | handle_t *handle; |
2669 | int i = 0, err = 0; | 2691 | int i = 0, err = 0; |
2670 | 2692 | ||
@@ -2676,7 +2698,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2676 | return PTR_ERR(handle); | 2698 | return PTR_ERR(handle); |
2677 | 2699 | ||
2678 | again: | 2700 | again: |
2679 | trace_ext4_ext_remove_space(inode, start, depth); | 2701 | trace_ext4_ext_remove_space(inode, start, end, depth); |
2680 | 2702 | ||
2681 | /* | 2703 | /* |
2682 | * Check if we are removing extents inside the extent tree. If that | 2704 | * Check if we are removing extents inside the extent tree. If that |
@@ -2844,17 +2866,14 @@ again: | |||
2844 | } | 2866 | } |
2845 | } | 2867 | } |
2846 | 2868 | ||
2847 | trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, | 2869 | trace_ext4_ext_remove_space_done(inode, start, end, depth, |
2848 | path->p_hdr->eh_entries); | 2870 | partial_cluster, path->p_hdr->eh_entries); |
2849 | 2871 | ||
2850 | /* If we still have something in the partial cluster and we have removed | 2872 | /* If we still have something in the partial cluster and we have removed |
2851 | * even the first extent, then we should free the blocks in the partial | 2873 | * even the first extent, then we should free the blocks in the partial |
2852 | * cluster as well. */ | 2874 | * cluster as well. */ |
2853 | if (partial_cluster && path->p_hdr->eh_entries == 0) { | 2875 | if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { |
2854 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2876 | int flags = get_default_free_blocks_flags(inode); |
2855 | |||
2856 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2857 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2858 | 2877 | ||
2859 | ext4_free_blocks(handle, inode, NULL, | 2878 | ext4_free_blocks(handle, inode, NULL, |
2860 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | 2879 | EXT4_C2B(EXT4_SB(sb), partial_cluster), |
@@ -4363,7 +4382,7 @@ out2: | |||
4363 | } | 4382 | } |
4364 | 4383 | ||
4365 | out3: | 4384 | out3: |
4366 | trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated); | 4385 | trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); |
4367 | 4386 | ||
4368 | return err ? err : allocated; | 4387 | return err ? err : allocated; |
4369 | } | 4388 | } |
@@ -4446,7 +4465,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4446 | return -EOPNOTSUPP; | 4465 | return -EOPNOTSUPP; |
4447 | 4466 | ||
4448 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4467 | if (mode & FALLOC_FL_PUNCH_HOLE) |
4449 | return ext4_punch_hole(file, offset, len); | 4468 | return ext4_punch_hole(inode, offset, len); |
4450 | 4469 | ||
4451 | ret = ext4_convert_inline_data(inode); | 4470 | ret = ext4_convert_inline_data(inode); |
4452 | if (ret) | 4471 | if (ret) |
@@ -4548,10 +4567,9 @@ retry: | |||
4548 | * function, to convert the fallocated extents after IO is completed. | 4567 | * function, to convert the fallocated extents after IO is completed. |
4549 | * Returns 0 on success. | 4568 | * Returns 0 on success. |
4550 | */ | 4569 | */ |
4551 | int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 4570 | int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
4552 | ssize_t len) | 4571 | loff_t offset, ssize_t len) |
4553 | { | 4572 | { |
4554 | handle_t *handle; | ||
4555 | unsigned int max_blocks; | 4573 | unsigned int max_blocks; |
4556 | int ret = 0; | 4574 | int ret = 0; |
4557 | int ret2 = 0; | 4575 | int ret2 = 0; |
@@ -4566,16 +4584,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4566 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - | 4584 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - |
4567 | map.m_lblk); | 4585 | map.m_lblk); |
4568 | /* | 4586 | /* |
4569 | * credits to insert 1 extent into extent tree | 4587 | * This is somewhat ugly but the idea is clear: When transaction is |
4588 | * reserved, everything goes into it. Otherwise we rather start several | ||
4589 | * smaller transactions for conversion of each extent separately. | ||
4570 | */ | 4590 | */ |
4571 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 4591 | if (handle) { |
4592 | handle = ext4_journal_start_reserved(handle, | ||
4593 | EXT4_HT_EXT_CONVERT); | ||
4594 | if (IS_ERR(handle)) | ||
4595 | return PTR_ERR(handle); | ||
4596 | credits = 0; | ||
4597 | } else { | ||
4598 | /* | ||
4599 | * credits to insert 1 extent into extent tree | ||
4600 | */ | ||
4601 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
4602 | } | ||
4572 | while (ret >= 0 && ret < max_blocks) { | 4603 | while (ret >= 0 && ret < max_blocks) { |
4573 | map.m_lblk += ret; | 4604 | map.m_lblk += ret; |
4574 | map.m_len = (max_blocks -= ret); | 4605 | map.m_len = (max_blocks -= ret); |
4575 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | 4606 | if (credits) { |
4576 | if (IS_ERR(handle)) { | 4607 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
4577 | ret = PTR_ERR(handle); | 4608 | credits); |
4578 | break; | 4609 | if (IS_ERR(handle)) { |
4610 | ret = PTR_ERR(handle); | ||
4611 | break; | ||
4612 | } | ||
4579 | } | 4613 | } |
4580 | ret = ext4_map_blocks(handle, inode, &map, | 4614 | ret = ext4_map_blocks(handle, inode, &map, |
4581 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 4615 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
@@ -4586,10 +4620,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4586 | inode->i_ino, map.m_lblk, | 4620 | inode->i_ino, map.m_lblk, |
4587 | map.m_len, ret); | 4621 | map.m_len, ret); |
4588 | ext4_mark_inode_dirty(handle, inode); | 4622 | ext4_mark_inode_dirty(handle, inode); |
4589 | ret2 = ext4_journal_stop(handle); | 4623 | if (credits) |
4590 | if (ret <= 0 || ret2 ) | 4624 | ret2 = ext4_journal_stop(handle); |
4625 | if (ret <= 0 || ret2) | ||
4591 | break; | 4626 | break; |
4592 | } | 4627 | } |
4628 | if (!credits) | ||
4629 | ret2 = ext4_journal_stop(handle); | ||
4593 | return ret > 0 ? ret2 : ret; | 4630 | return ret > 0 ? ret2 : ret; |
4594 | } | 4631 | } |
4595 | 4632 | ||
@@ -4659,7 +4696,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4659 | error = ext4_get_inode_loc(inode, &iloc); | 4696 | error = ext4_get_inode_loc(inode, &iloc); |
4660 | if (error) | 4697 | if (error) |
4661 | return error; | 4698 | return error; |
4662 | physical = iloc.bh->b_blocknr << blockbits; | 4699 | physical = (__u64)iloc.bh->b_blocknr << blockbits; |
4663 | offset = EXT4_GOOD_OLD_INODE_SIZE + | 4700 | offset = EXT4_GOOD_OLD_INODE_SIZE + |
4664 | EXT4_I(inode)->i_extra_isize; | 4701 | EXT4_I(inode)->i_extra_isize; |
4665 | physical += offset; | 4702 | physical += offset; |
@@ -4667,7 +4704,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4667 | flags |= FIEMAP_EXTENT_DATA_INLINE; | 4704 | flags |= FIEMAP_EXTENT_DATA_INLINE; |
4668 | brelse(iloc.bh); | 4705 | brelse(iloc.bh); |
4669 | } else { /* external block */ | 4706 | } else { /* external block */ |
4670 | physical = EXT4_I(inode)->i_file_acl << blockbits; | 4707 | physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; |
4671 | length = inode->i_sb->s_blocksize; | 4708 | length = inode->i_sb->s_blocksize; |
4672 | } | 4709 | } |
4673 | 4710 | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e6941e622d31..ee018d5f397e 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * Ext4 extents status tree core functions. | 10 | * Ext4 extents status tree core functions. |
11 | */ | 11 | */ |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/list_sort.h> | ||
13 | #include "ext4.h" | 14 | #include "ext4.h" |
14 | #include "extents_status.h" | 15 | #include "extents_status.h" |
15 | #include "ext4_extents.h" | 16 | #include "ext4_extents.h" |
@@ -291,7 +292,6 @@ out: | |||
291 | 292 | ||
292 | read_unlock(&EXT4_I(inode)->i_es_lock); | 293 | read_unlock(&EXT4_I(inode)->i_es_lock); |
293 | 294 | ||
294 | ext4_es_lru_add(inode); | ||
295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
296 | } | 296 | } |
297 | 297 | ||
@@ -672,7 +672,6 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
672 | error: | 672 | error: |
673 | write_unlock(&EXT4_I(inode)->i_es_lock); | 673 | write_unlock(&EXT4_I(inode)->i_es_lock); |
674 | 674 | ||
675 | ext4_es_lru_add(inode); | ||
676 | ext4_es_print_tree(inode); | 675 | ext4_es_print_tree(inode); |
677 | 676 | ||
678 | return err; | 677 | return err; |
@@ -734,7 +733,6 @@ out: | |||
734 | 733 | ||
735 | read_unlock(&EXT4_I(inode)->i_es_lock); | 734 | read_unlock(&EXT4_I(inode)->i_es_lock); |
736 | 735 | ||
737 | ext4_es_lru_add(inode); | ||
738 | trace_ext4_es_lookup_extent_exit(inode, es, found); | 736 | trace_ext4_es_lookup_extent_exit(inode, es, found); |
739 | return found; | 737 | return found; |
740 | } | 738 | } |
@@ -878,12 +876,28 @@ int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
878 | EXTENT_STATUS_WRITTEN); | 876 | EXTENT_STATUS_WRITTEN); |
879 | } | 877 | } |
880 | 878 | ||
879 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | ||
880 | struct list_head *b) | ||
881 | { | ||
882 | struct ext4_inode_info *eia, *eib; | ||
883 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
884 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
885 | |||
886 | if (eia->i_touch_when == eib->i_touch_when) | ||
887 | return 0; | ||
888 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
889 | return 1; | ||
890 | else | ||
891 | return -1; | ||
892 | } | ||
893 | |||
881 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 894 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) |
882 | { | 895 | { |
883 | struct ext4_sb_info *sbi = container_of(shrink, | 896 | struct ext4_sb_info *sbi = container_of(shrink, |
884 | struct ext4_sb_info, s_es_shrinker); | 897 | struct ext4_sb_info, s_es_shrinker); |
885 | struct ext4_inode_info *ei; | 898 | struct ext4_inode_info *ei; |
886 | struct list_head *cur, *tmp, scanned; | 899 | struct list_head *cur, *tmp; |
900 | LIST_HEAD(skiped); | ||
887 | int nr_to_scan = sc->nr_to_scan; | 901 | int nr_to_scan = sc->nr_to_scan; |
888 | int ret, nr_shrunk = 0; | 902 | int ret, nr_shrunk = 0; |
889 | 903 | ||
@@ -893,23 +907,41 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
893 | if (!nr_to_scan) | 907 | if (!nr_to_scan) |
894 | return ret; | 908 | return ret; |
895 | 909 | ||
896 | INIT_LIST_HEAD(&scanned); | ||
897 | |||
898 | spin_lock(&sbi->s_es_lru_lock); | 910 | spin_lock(&sbi->s_es_lru_lock); |
911 | |||
912 | /* | ||
913 | * If the inode that is at the head of LRU list is newer than | ||
914 | * last_sorted time, that means that we need to sort this list. | ||
915 | */ | ||
916 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); | ||
917 | if (sbi->s_es_last_sorted < ei->i_touch_when) { | ||
918 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
919 | sbi->s_es_last_sorted = jiffies; | ||
920 | } | ||
921 | |||
899 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 922 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
900 | list_move_tail(cur, &scanned); | 923 | /* |
924 | * If we have already reclaimed all extents from extent | ||
925 | * status tree, just stop the loop immediately. | ||
926 | */ | ||
927 | if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | ||
928 | break; | ||
901 | 929 | ||
902 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 930 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
903 | 931 | ||
904 | read_lock(&ei->i_es_lock); | 932 | /* Skip the inode that is newer than the last_sorted time */ |
905 | if (ei->i_es_lru_nr == 0) { | 933 | if (sbi->s_es_last_sorted < ei->i_touch_when) { |
906 | read_unlock(&ei->i_es_lock); | 934 | list_move_tail(cur, &skiped); |
907 | continue; | 935 | continue; |
908 | } | 936 | } |
909 | read_unlock(&ei->i_es_lock); | 937 | |
938 | if (ei->i_es_lru_nr == 0) | ||
939 | continue; | ||
910 | 940 | ||
911 | write_lock(&ei->i_es_lock); | 941 | write_lock(&ei->i_es_lock); |
912 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 942 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); |
943 | if (ei->i_es_lru_nr == 0) | ||
944 | list_del_init(&ei->i_es_lru); | ||
913 | write_unlock(&ei->i_es_lock); | 945 | write_unlock(&ei->i_es_lock); |
914 | 946 | ||
915 | nr_shrunk += ret; | 947 | nr_shrunk += ret; |
@@ -917,7 +949,9 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
917 | if (nr_to_scan == 0) | 949 | if (nr_to_scan == 0) |
918 | break; | 950 | break; |
919 | } | 951 | } |
920 | list_splice_tail(&scanned, &sbi->s_es_lru); | 952 | |
953 | /* Move the newer inodes into the tail of the LRU list. */ | ||
954 | list_splice_tail(&skiped, &sbi->s_es_lru); | ||
921 | spin_unlock(&sbi->s_es_lru_lock); | 955 | spin_unlock(&sbi->s_es_lru_lock); |
922 | 956 | ||
923 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 957 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); |
@@ -925,21 +959,19 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
925 | return ret; | 959 | return ret; |
926 | } | 960 | } |
927 | 961 | ||
928 | void ext4_es_register_shrinker(struct super_block *sb) | 962 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) |
929 | { | 963 | { |
930 | struct ext4_sb_info *sbi; | ||
931 | |||
932 | sbi = EXT4_SB(sb); | ||
933 | INIT_LIST_HEAD(&sbi->s_es_lru); | 964 | INIT_LIST_HEAD(&sbi->s_es_lru); |
934 | spin_lock_init(&sbi->s_es_lru_lock); | 965 | spin_lock_init(&sbi->s_es_lru_lock); |
966 | sbi->s_es_last_sorted = 0; | ||
935 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | 967 | sbi->s_es_shrinker.shrink = ext4_es_shrink; |
936 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 968 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
937 | register_shrinker(&sbi->s_es_shrinker); | 969 | register_shrinker(&sbi->s_es_shrinker); |
938 | } | 970 | } |
939 | 971 | ||
940 | void ext4_es_unregister_shrinker(struct super_block *sb) | 972 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
941 | { | 973 | { |
942 | unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); | 974 | unregister_shrinker(&sbi->s_es_shrinker); |
943 | } | 975 | } |
944 | 976 | ||
945 | void ext4_es_lru_add(struct inode *inode) | 977 | void ext4_es_lru_add(struct inode *inode) |
@@ -947,11 +979,14 @@ void ext4_es_lru_add(struct inode *inode) | |||
947 | struct ext4_inode_info *ei = EXT4_I(inode); | 979 | struct ext4_inode_info *ei = EXT4_I(inode); |
948 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 980 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
949 | 981 | ||
982 | ei->i_touch_when = jiffies; | ||
983 | |||
984 | if (!list_empty(&ei->i_es_lru)) | ||
985 | return; | ||
986 | |||
950 | spin_lock(&sbi->s_es_lru_lock); | 987 | spin_lock(&sbi->s_es_lru_lock); |
951 | if (list_empty(&ei->i_es_lru)) | 988 | if (list_empty(&ei->i_es_lru)) |
952 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 989 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); |
953 | else | ||
954 | list_move_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
955 | spin_unlock(&sbi->s_es_lru_lock); | 990 | spin_unlock(&sbi->s_es_lru_lock); |
956 | } | 991 | } |
957 | 992 | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f740eb03b707..e936730cc5b0 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -39,6 +39,7 @@ | |||
39 | EXTENT_STATUS_DELAYED | \ | 39 | EXTENT_STATUS_DELAYED | \ |
40 | EXTENT_STATUS_HOLE) | 40 | EXTENT_STATUS_HOLE) |
41 | 41 | ||
42 | struct ext4_sb_info; | ||
42 | struct ext4_extent; | 43 | struct ext4_extent; |
43 | 44 | ||
44 | struct extent_status { | 45 | struct extent_status { |
@@ -119,8 +120,8 @@ static inline void ext4_es_store_status(struct extent_status *es, | |||
119 | es->es_pblk = block; | 120 | es->es_pblk = block; |
120 | } | 121 | } |
121 | 122 | ||
122 | extern void ext4_es_register_shrinker(struct super_block *sb); | 123 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
123 | extern void ext4_es_unregister_shrinker(struct super_block *sb); | 124 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
124 | extern void ext4_es_lru_add(struct inode *inode); | 125 | extern void ext4_es_lru_add(struct inode *inode); |
125 | extern void ext4_es_lru_del(struct inode *inode); | 126 | extern void ext4_es_lru_del(struct inode *inode); |
126 | 127 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b1b4d51b5d86..b19f0a457f32 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -312,7 +312,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, | |||
312 | blkbits = inode->i_sb->s_blocksize_bits; | 312 | blkbits = inode->i_sb->s_blocksize_bits; |
313 | startoff = *offset; | 313 | startoff = *offset; |
314 | lastoff = startoff; | 314 | lastoff = startoff; |
315 | endoff = (map->m_lblk + map->m_len) << blkbits; | 315 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; |
316 | 316 | ||
317 | index = startoff >> PAGE_CACHE_SHIFT; | 317 | index = startoff >> PAGE_CACHE_SHIFT; |
318 | end = endoff >> PAGE_CACHE_SHIFT; | 318 | end = endoff >> PAGE_CACHE_SHIFT; |
@@ -457,7 +457,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
457 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 457 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
459 | if (last != start) | 459 | if (last != start) |
460 | dataoff = last << blkbits; | 460 | dataoff = (loff_t)last << blkbits; |
461 | break; | 461 | break; |
462 | } | 462 | } |
463 | 463 | ||
@@ -468,7 +468,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
470 | if (last != start) | 470 | if (last != start) |
471 | dataoff = last << blkbits; | 471 | dataoff = (loff_t)last << blkbits; |
472 | break; | 472 | break; |
473 | } | 473 | } |
474 | 474 | ||
@@ -486,7 +486,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
486 | } | 486 | } |
487 | 487 | ||
488 | last++; | 488 | last++; |
489 | dataoff = last << blkbits; | 489 | dataoff = (loff_t)last << blkbits; |
490 | } while (last <= end); | 490 | } while (last <= end); |
491 | 491 | ||
492 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
@@ -540,7 +540,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
540 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 540 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
541 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 541 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
542 | last += ret; | 542 | last += ret; |
543 | holeoff = last << blkbits; | 543 | holeoff = (loff_t)last << blkbits; |
544 | continue; | 544 | continue; |
545 | } | 545 | } |
546 | 546 | ||
@@ -551,7 +551,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
551 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 551 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
553 | last = es.es_lblk + es.es_len; | 553 | last = es.es_lblk + es.es_len; |
554 | holeoff = last << blkbits; | 554 | holeoff = (loff_t)last << blkbits; |
555 | continue; | 555 | continue; |
556 | } | 556 | } |
557 | 557 | ||
@@ -566,7 +566,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
566 | &map, &holeoff); | 566 | &map, &holeoff); |
567 | if (!unwritten) { | 567 | if (!unwritten) { |
568 | last += ret; | 568 | last += ret; |
569 | holeoff = last << blkbits; | 569 | holeoff = (loff_t)last << blkbits; |
570 | continue; | 570 | continue; |
571 | } | 571 | } |
572 | } | 572 | } |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e0ba8a408def..a8bc47f75fa0 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -73,32 +73,6 @@ static int ext4_sync_parent(struct inode *inode) | |||
73 | return ret; | 73 | return ret; |
74 | } | 74 | } |
75 | 75 | ||
76 | /** | ||
77 | * __sync_file - generic_file_fsync without the locking and filemap_write | ||
78 | * @inode: inode to sync | ||
79 | * @datasync: only sync essential metadata if true | ||
80 | * | ||
81 | * This is just generic_file_fsync without the locking. This is needed for | ||
82 | * nojournal mode to make sure this inodes data/metadata makes it to disk | ||
83 | * properly. The i_mutex should be held already. | ||
84 | */ | ||
85 | static int __sync_inode(struct inode *inode, int datasync) | ||
86 | { | ||
87 | int err; | ||
88 | int ret; | ||
89 | |||
90 | ret = sync_mapping_buffers(inode->i_mapping); | ||
91 | if (!(inode->i_state & I_DIRTY)) | ||
92 | return ret; | ||
93 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
94 | return ret; | ||
95 | |||
96 | err = sync_inode_metadata(inode, 1); | ||
97 | if (ret == 0) | ||
98 | ret = err; | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | /* | 76 | /* |
103 | * akpm: A new design for ext4_sync_file(). | 77 | * akpm: A new design for ext4_sync_file(). |
104 | * | 78 | * |
@@ -116,7 +90,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
116 | struct inode *inode = file->f_mapping->host; | 90 | struct inode *inode = file->f_mapping->host; |
117 | struct ext4_inode_info *ei = EXT4_I(inode); | 91 | struct ext4_inode_info *ei = EXT4_I(inode); |
118 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 92 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
119 | int ret, err; | 93 | int ret = 0, err; |
120 | tid_t commit_tid; | 94 | tid_t commit_tid; |
121 | bool needs_barrier = false; | 95 | bool needs_barrier = false; |
122 | 96 | ||
@@ -124,25 +98,24 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
124 | 98 | ||
125 | trace_ext4_sync_file_enter(file, datasync); | 99 | trace_ext4_sync_file_enter(file, datasync); |
126 | 100 | ||
127 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 101 | if (inode->i_sb->s_flags & MS_RDONLY) { |
128 | if (ret) | 102 | /* Make sure that we read updated s_mount_flags value */ |
129 | return ret; | 103 | smp_rmb(); |
130 | mutex_lock(&inode->i_mutex); | 104 | if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) |
131 | 105 | ret = -EROFS; | |
132 | if (inode->i_sb->s_flags & MS_RDONLY) | ||
133 | goto out; | ||
134 | |||
135 | ret = ext4_flush_unwritten_io(inode); | ||
136 | if (ret < 0) | ||
137 | goto out; | 106 | goto out; |
107 | } | ||
138 | 108 | ||
139 | if (!journal) { | 109 | if (!journal) { |
140 | ret = __sync_inode(inode, datasync); | 110 | ret = generic_file_fsync(file, start, end, datasync); |
141 | if (!ret && !hlist_empty(&inode->i_dentry)) | 111 | if (!ret && !hlist_empty(&inode->i_dentry)) |
142 | ret = ext4_sync_parent(inode); | 112 | ret = ext4_sync_parent(inode); |
143 | goto out; | 113 | goto out; |
144 | } | 114 | } |
145 | 115 | ||
116 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
117 | if (ret) | ||
118 | return ret; | ||
146 | /* | 119 | /* |
147 | * data=writeback,ordered: | 120 | * data=writeback,ordered: |
148 | * The caller's filemap_fdatawrite()/wait will sync the data. | 121 | * The caller's filemap_fdatawrite()/wait will sync the data. |
@@ -172,8 +145,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
172 | if (!ret) | 145 | if (!ret) |
173 | ret = err; | 146 | ret = err; |
174 | } | 147 | } |
175 | out: | 148 | out: |
176 | mutex_unlock(&inode->i_mutex); | ||
177 | trace_ext4_sync_file_exit(inode, ret); | 149 | trace_ext4_sync_file_exit(inode, ret); |
178 | return ret; | 150 | return ret; |
179 | } | 151 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 00a818d67b54..f03598c6ffd3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -747,7 +747,8 @@ repeat_in_this_group: | |||
747 | if (!handle) { | 747 | if (!handle) { |
748 | BUG_ON(nblocks <= 0); | 748 | BUG_ON(nblocks <= 0); |
749 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, | 749 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, |
750 | handle_type, nblocks); | 750 | handle_type, nblocks, |
751 | 0); | ||
751 | if (IS_ERR(handle)) { | 752 | if (IS_ERR(handle)) { |
752 | err = PTR_ERR(handle); | 753 | err = PTR_ERR(handle); |
753 | ext4_std_error(sb, err); | 754 | ext4_std_error(sb, err); |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8d5d351e24f..87b30cd357e7 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -624,7 +624,7 @@ cleanup: | |||
624 | partial--; | 624 | partial--; |
625 | } | 625 | } |
626 | out: | 626 | out: |
627 | trace_ext4_ind_map_blocks_exit(inode, map, err); | 627 | trace_ext4_ind_map_blocks_exit(inode, flags, map, err); |
628 | return err; | 628 | return err; |
629 | } | 629 | } |
630 | 630 | ||
@@ -675,11 +675,6 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
675 | 675 | ||
676 | retry: | 676 | retry: |
677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | 677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
678 | if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { | ||
679 | mutex_lock(&inode->i_mutex); | ||
680 | ext4_flush_unwritten_io(inode); | ||
681 | mutex_unlock(&inode->i_mutex); | ||
682 | } | ||
683 | /* | 678 | /* |
684 | * Nolock dioread optimization may be dynamically disabled | 679 | * Nolock dioread optimization may be dynamically disabled |
685 | * via ext4_inode_block_unlocked_dio(). Check inode's state | 680 | * via ext4_inode_block_unlocked_dio(). Check inode's state |
@@ -779,27 +774,18 @@ int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | |||
779 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 774 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; |
780 | } | 775 | } |
781 | 776 | ||
782 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 777 | /* |
778 | * Calculate number of indirect blocks touched by mapping @nrblocks logically | ||
779 | * contiguous blocks | ||
780 | */ | ||
781 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) | ||
783 | { | 782 | { |
784 | int indirects; | ||
785 | |||
786 | /* if nrblocks are contiguous */ | ||
787 | if (chunk) { | ||
788 | /* | ||
789 | * With N contiguous data blocks, we need at most | ||
790 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
791 | * 2 dindirect blocks, and 1 tindirect block | ||
792 | */ | ||
793 | return DIV_ROUND_UP(nrblocks, | ||
794 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
795 | } | ||
796 | /* | 783 | /* |
797 | * if nrblocks are not contiguous, worse case, each block touch | 784 | * With N contiguous data blocks, we need at most |
798 | * a indirect block, and each indirect block touch a double indirect | 785 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
799 | * block, plus a triple indirect block | 786 | * 2 dindirect blocks, and 1 tindirect block |
800 | */ | 787 | */ |
801 | indirects = nrblocks * 2 + 1; | 788 | return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
802 | return indirects; | ||
803 | } | 789 | } |
804 | 790 | ||
805 | /* | 791 | /* |
@@ -940,11 +926,13 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
940 | __le32 *last) | 926 | __le32 *last) |
941 | { | 927 | { |
942 | __le32 *p; | 928 | __le32 *p; |
943 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 929 | int flags = EXT4_FREE_BLOCKS_VALIDATED; |
944 | int err; | 930 | int err; |
945 | 931 | ||
946 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 932 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
947 | flags |= EXT4_FREE_BLOCKS_METADATA; | 933 | flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; |
934 | else if (ext4_should_journal_data(inode)) | ||
935 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
948 | 936 | ||
949 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 937 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
950 | count)) { | 938 | count)) { |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1a346a6bdc8f..d9ecbf1113a7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -72,7 +72,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode, | |||
72 | entry = (struct ext4_xattr_entry *) | 72 | entry = (struct ext4_xattr_entry *) |
73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); | 73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); |
74 | 74 | ||
75 | free += le32_to_cpu(entry->e_value_size); | 75 | free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); |
76 | goto out; | 76 | goto out; |
77 | } | 77 | } |
78 | 78 | ||
@@ -1810,7 +1810,7 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
1810 | if (error) | 1810 | if (error) |
1811 | goto out; | 1811 | goto out; |
1812 | 1812 | ||
1813 | physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; | 1813 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; |
1814 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; | 1814 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; |
1815 | physical += offsetof(struct ext4_inode, i_block); | 1815 | physical += offsetof(struct ext4_inode, i_block); |
1816 | length = i_size_read(inode); | 1816 | length = i_size_read(inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6382b89ecbd..0188e65e1f58 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -132,12 +132,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
132 | new_size); | 132 | new_size); |
133 | } | 133 | } |
134 | 134 | ||
135 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 135 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
136 | unsigned int length); | ||
136 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 137 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
137 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 138 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
138 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 139 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
139 | struct inode *inode, struct page *page, loff_t from, | 140 | int pextents); |
140 | loff_t length, int flags); | ||
141 | 141 | ||
142 | /* | 142 | /* |
143 | * Test whether an inode is a fast symlink. | 143 | * Test whether an inode is a fast symlink. |
@@ -215,7 +215,8 @@ void ext4_evict_inode(struct inode *inode) | |||
215 | filemap_write_and_wait(&inode->i_data); | 215 | filemap_write_and_wait(&inode->i_data); |
216 | } | 216 | } |
217 | truncate_inode_pages(&inode->i_data, 0); | 217 | truncate_inode_pages(&inode->i_data, 0); |
218 | ext4_ioend_shutdown(inode); | 218 | |
219 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
219 | goto no_delete; | 220 | goto no_delete; |
220 | } | 221 | } |
221 | 222 | ||
@@ -225,8 +226,8 @@ void ext4_evict_inode(struct inode *inode) | |||
225 | if (ext4_should_order_data(inode)) | 226 | if (ext4_should_order_data(inode)) |
226 | ext4_begin_ordered_truncate(inode, 0); | 227 | ext4_begin_ordered_truncate(inode, 0); |
227 | truncate_inode_pages(&inode->i_data, 0); | 228 | truncate_inode_pages(&inode->i_data, 0); |
228 | ext4_ioend_shutdown(inode); | ||
229 | 229 | ||
230 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
230 | if (is_bad_inode(inode)) | 231 | if (is_bad_inode(inode)) |
231 | goto no_delete; | 232 | goto no_delete; |
232 | 233 | ||
@@ -423,66 +424,6 @@ static int __check_block_validity(struct inode *inode, const char *func, | |||
423 | #define check_block_validity(inode, map) \ | 424 | #define check_block_validity(inode, map) \ |
424 | __check_block_validity((inode), __func__, __LINE__, (map)) | 425 | __check_block_validity((inode), __func__, __LINE__, (map)) |
425 | 426 | ||
426 | /* | ||
427 | * Return the number of contiguous dirty pages in a given inode | ||
428 | * starting at page frame idx. | ||
429 | */ | ||
430 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | ||
431 | unsigned int max_pages) | ||
432 | { | ||
433 | struct address_space *mapping = inode->i_mapping; | ||
434 | pgoff_t index; | ||
435 | struct pagevec pvec; | ||
436 | pgoff_t num = 0; | ||
437 | int i, nr_pages, done = 0; | ||
438 | |||
439 | if (max_pages == 0) | ||
440 | return 0; | ||
441 | pagevec_init(&pvec, 0); | ||
442 | while (!done) { | ||
443 | index = idx; | ||
444 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
445 | PAGECACHE_TAG_DIRTY, | ||
446 | (pgoff_t)PAGEVEC_SIZE); | ||
447 | if (nr_pages == 0) | ||
448 | break; | ||
449 | for (i = 0; i < nr_pages; i++) { | ||
450 | struct page *page = pvec.pages[i]; | ||
451 | struct buffer_head *bh, *head; | ||
452 | |||
453 | lock_page(page); | ||
454 | if (unlikely(page->mapping != mapping) || | ||
455 | !PageDirty(page) || | ||
456 | PageWriteback(page) || | ||
457 | page->index != idx) { | ||
458 | done = 1; | ||
459 | unlock_page(page); | ||
460 | break; | ||
461 | } | ||
462 | if (page_has_buffers(page)) { | ||
463 | bh = head = page_buffers(page); | ||
464 | do { | ||
465 | if (!buffer_delay(bh) && | ||
466 | !buffer_unwritten(bh)) | ||
467 | done = 1; | ||
468 | bh = bh->b_this_page; | ||
469 | } while (!done && (bh != head)); | ||
470 | } | ||
471 | unlock_page(page); | ||
472 | if (done) | ||
473 | break; | ||
474 | idx++; | ||
475 | num++; | ||
476 | if (num >= max_pages) { | ||
477 | done = 1; | ||
478 | break; | ||
479 | } | ||
480 | } | ||
481 | pagevec_release(&pvec); | ||
482 | } | ||
483 | return num; | ||
484 | } | ||
485 | |||
486 | #ifdef ES_AGGRESSIVE_TEST | 427 | #ifdef ES_AGGRESSIVE_TEST |
487 | static void ext4_map_blocks_es_recheck(handle_t *handle, | 428 | static void ext4_map_blocks_es_recheck(handle_t *handle, |
488 | struct inode *inode, | 429 | struct inode *inode, |
@@ -573,6 +514,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
573 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | 514 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
574 | (unsigned long) map->m_lblk); | 515 | (unsigned long) map->m_lblk); |
575 | 516 | ||
517 | ext4_es_lru_add(inode); | ||
518 | |||
576 | /* Lookup extent status tree firstly */ | 519 | /* Lookup extent status tree firstly */ |
577 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 520 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
578 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 521 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
@@ -1118,10 +1061,13 @@ static int ext4_write_end(struct file *file, | |||
1118 | } | 1061 | } |
1119 | } | 1062 | } |
1120 | 1063 | ||
1121 | if (ext4_has_inline_data(inode)) | 1064 | if (ext4_has_inline_data(inode)) { |
1122 | copied = ext4_write_inline_data_end(inode, pos, len, | 1065 | ret = ext4_write_inline_data_end(inode, pos, len, |
1123 | copied, page); | 1066 | copied, page); |
1124 | else | 1067 | if (ret < 0) |
1068 | goto errout; | ||
1069 | copied = ret; | ||
1070 | } else | ||
1125 | copied = block_write_end(file, mapping, pos, | 1071 | copied = block_write_end(file, mapping, pos, |
1126 | len, copied, page, fsdata); | 1072 | len, copied, page, fsdata); |
1127 | 1073 | ||
@@ -1157,8 +1103,6 @@ static int ext4_write_end(struct file *file, | |||
1157 | if (i_size_changed) | 1103 | if (i_size_changed) |
1158 | ext4_mark_inode_dirty(handle, inode); | 1104 | ext4_mark_inode_dirty(handle, inode); |
1159 | 1105 | ||
1160 | if (copied < 0) | ||
1161 | ret = copied; | ||
1162 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1106 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1163 | /* if we have allocated more blocks and copied | 1107 | /* if we have allocated more blocks and copied |
1164 | * less. We will have blocks allocated outside | 1108 | * less. We will have blocks allocated outside |
@@ -1415,21 +1359,28 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1415 | } | 1359 | } |
1416 | 1360 | ||
1417 | static void ext4_da_page_release_reservation(struct page *page, | 1361 | static void ext4_da_page_release_reservation(struct page *page, |
1418 | unsigned long offset) | 1362 | unsigned int offset, |
1363 | unsigned int length) | ||
1419 | { | 1364 | { |
1420 | int to_release = 0; | 1365 | int to_release = 0; |
1421 | struct buffer_head *head, *bh; | 1366 | struct buffer_head *head, *bh; |
1422 | unsigned int curr_off = 0; | 1367 | unsigned int curr_off = 0; |
1423 | struct inode *inode = page->mapping->host; | 1368 | struct inode *inode = page->mapping->host; |
1424 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1369 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1370 | unsigned int stop = offset + length; | ||
1425 | int num_clusters; | 1371 | int num_clusters; |
1426 | ext4_fsblk_t lblk; | 1372 | ext4_fsblk_t lblk; |
1427 | 1373 | ||
1374 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
1375 | |||
1428 | head = page_buffers(page); | 1376 | head = page_buffers(page); |
1429 | bh = head; | 1377 | bh = head; |
1430 | do { | 1378 | do { |
1431 | unsigned int next_off = curr_off + bh->b_size; | 1379 | unsigned int next_off = curr_off + bh->b_size; |
1432 | 1380 | ||
1381 | if (next_off > stop) | ||
1382 | break; | ||
1383 | |||
1433 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1384 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1434 | to_release++; | 1385 | to_release++; |
1435 | clear_buffer_delay(bh); | 1386 | clear_buffer_delay(bh); |
@@ -1460,140 +1411,43 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1460 | * Delayed allocation stuff | 1411 | * Delayed allocation stuff |
1461 | */ | 1412 | */ |
1462 | 1413 | ||
1463 | /* | 1414 | struct mpage_da_data { |
1464 | * mpage_da_submit_io - walks through extent of pages and try to write | 1415 | struct inode *inode; |
1465 | * them with writepage() call back | 1416 | struct writeback_control *wbc; |
1466 | * | ||
1467 | * @mpd->inode: inode | ||
1468 | * @mpd->first_page: first page of the extent | ||
1469 | * @mpd->next_page: page after the last page of the extent | ||
1470 | * | ||
1471 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
1472 | * to be allocated. this may be wrong if allocation failed. | ||
1473 | * | ||
1474 | * As pages are already locked by write_cache_pages(), we can't use it | ||
1475 | */ | ||
1476 | static int mpage_da_submit_io(struct mpage_da_data *mpd, | ||
1477 | struct ext4_map_blocks *map) | ||
1478 | { | ||
1479 | struct pagevec pvec; | ||
1480 | unsigned long index, end; | ||
1481 | int ret = 0, err, nr_pages, i; | ||
1482 | struct inode *inode = mpd->inode; | ||
1483 | struct address_space *mapping = inode->i_mapping; | ||
1484 | loff_t size = i_size_read(inode); | ||
1485 | unsigned int len, block_start; | ||
1486 | struct buffer_head *bh, *page_bufs = NULL; | ||
1487 | sector_t pblock = 0, cur_logical = 0; | ||
1488 | struct ext4_io_submit io_submit; | ||
1489 | 1417 | ||
1490 | BUG_ON(mpd->next_page <= mpd->first_page); | 1418 | pgoff_t first_page; /* The first page to write */ |
1491 | memset(&io_submit, 0, sizeof(io_submit)); | 1419 | pgoff_t next_page; /* Current page to examine */ |
1420 | pgoff_t last_page; /* Last page to examine */ | ||
1492 | /* | 1421 | /* |
1493 | * We need to start from the first_page to the next_page - 1 | 1422 | * Extent to map - this can be after first_page because that can be |
1494 | * to make sure we also write the mapped dirty buffer_heads. | 1423 | * fully mapped. We somewhat abuse m_flags to store whether the extent |
1495 | * If we look at mpd->b_blocknr we would only be looking | 1424 | * is delalloc or unwritten. |
1496 | * at the currently mapped buffer_heads. | ||
1497 | */ | 1425 | */ |
1498 | index = mpd->first_page; | 1426 | struct ext4_map_blocks map; |
1499 | end = mpd->next_page - 1; | 1427 | struct ext4_io_submit io_submit; /* IO submission data */ |
1500 | 1428 | }; | |
1501 | pagevec_init(&pvec, 0); | ||
1502 | while (index <= end) { | ||
1503 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1504 | if (nr_pages == 0) | ||
1505 | break; | ||
1506 | for (i = 0; i < nr_pages; i++) { | ||
1507 | int skip_page = 0; | ||
1508 | struct page *page = pvec.pages[i]; | ||
1509 | |||
1510 | index = page->index; | ||
1511 | if (index > end) | ||
1512 | break; | ||
1513 | |||
1514 | if (index == size >> PAGE_CACHE_SHIFT) | ||
1515 | len = size & ~PAGE_CACHE_MASK; | ||
1516 | else | ||
1517 | len = PAGE_CACHE_SIZE; | ||
1518 | if (map) { | ||
1519 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
1520 | inode->i_blkbits); | ||
1521 | pblock = map->m_pblk + (cur_logical - | ||
1522 | map->m_lblk); | ||
1523 | } | ||
1524 | index++; | ||
1525 | |||
1526 | BUG_ON(!PageLocked(page)); | ||
1527 | BUG_ON(PageWriteback(page)); | ||
1528 | |||
1529 | bh = page_bufs = page_buffers(page); | ||
1530 | block_start = 0; | ||
1531 | do { | ||
1532 | if (map && (cur_logical >= map->m_lblk) && | ||
1533 | (cur_logical <= (map->m_lblk + | ||
1534 | (map->m_len - 1)))) { | ||
1535 | if (buffer_delay(bh)) { | ||
1536 | clear_buffer_delay(bh); | ||
1537 | bh->b_blocknr = pblock; | ||
1538 | } | ||
1539 | if (buffer_unwritten(bh) || | ||
1540 | buffer_mapped(bh)) | ||
1541 | BUG_ON(bh->b_blocknr != pblock); | ||
1542 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
1543 | set_buffer_uninit(bh); | ||
1544 | clear_buffer_unwritten(bh); | ||
1545 | } | ||
1546 | |||
1547 | /* | ||
1548 | * skip page if block allocation undone and | ||
1549 | * block is dirty | ||
1550 | */ | ||
1551 | if (ext4_bh_delay_or_unwritten(NULL, bh)) | ||
1552 | skip_page = 1; | ||
1553 | bh = bh->b_this_page; | ||
1554 | block_start += bh->b_size; | ||
1555 | cur_logical++; | ||
1556 | pblock++; | ||
1557 | } while (bh != page_bufs); | ||
1558 | |||
1559 | if (skip_page) { | ||
1560 | unlock_page(page); | ||
1561 | continue; | ||
1562 | } | ||
1563 | |||
1564 | clear_page_dirty_for_io(page); | ||
1565 | err = ext4_bio_write_page(&io_submit, page, len, | ||
1566 | mpd->wbc); | ||
1567 | if (!err) | ||
1568 | mpd->pages_written++; | ||
1569 | /* | ||
1570 | * In error case, we have to continue because | ||
1571 | * remaining pages are still locked | ||
1572 | */ | ||
1573 | if (ret == 0) | ||
1574 | ret = err; | ||
1575 | } | ||
1576 | pagevec_release(&pvec); | ||
1577 | } | ||
1578 | ext4_io_submit(&io_submit); | ||
1579 | return ret; | ||
1580 | } | ||
1581 | 1429 | ||
1582 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | 1430 | static void mpage_release_unused_pages(struct mpage_da_data *mpd, |
1431 | bool invalidate) | ||
1583 | { | 1432 | { |
1584 | int nr_pages, i; | 1433 | int nr_pages, i; |
1585 | pgoff_t index, end; | 1434 | pgoff_t index, end; |
1586 | struct pagevec pvec; | 1435 | struct pagevec pvec; |
1587 | struct inode *inode = mpd->inode; | 1436 | struct inode *inode = mpd->inode; |
1588 | struct address_space *mapping = inode->i_mapping; | 1437 | struct address_space *mapping = inode->i_mapping; |
1589 | ext4_lblk_t start, last; | 1438 | |
1439 | /* This is necessary when next_page == 0. */ | ||
1440 | if (mpd->first_page >= mpd->next_page) | ||
1441 | return; | ||
1590 | 1442 | ||
1591 | index = mpd->first_page; | 1443 | index = mpd->first_page; |
1592 | end = mpd->next_page - 1; | 1444 | end = mpd->next_page - 1; |
1593 | 1445 | if (invalidate) { | |
1594 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1446 | ext4_lblk_t start, last; |
1595 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1447 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1596 | ext4_es_remove_extent(inode, start, last - start + 1); | 1448 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1449 | ext4_es_remove_extent(inode, start, last - start + 1); | ||
1450 | } | ||
1597 | 1451 | ||
1598 | pagevec_init(&pvec, 0); | 1452 | pagevec_init(&pvec, 0); |
1599 | while (index <= end) { | 1453 | while (index <= end) { |
@@ -1606,14 +1460,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
1606 | break; | 1460 | break; |
1607 | BUG_ON(!PageLocked(page)); | 1461 | BUG_ON(!PageLocked(page)); |
1608 | BUG_ON(PageWriteback(page)); | 1462 | BUG_ON(PageWriteback(page)); |
1609 | block_invalidatepage(page, 0); | 1463 | if (invalidate) { |
1610 | ClearPageUptodate(page); | 1464 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
1465 | ClearPageUptodate(page); | ||
1466 | } | ||
1611 | unlock_page(page); | 1467 | unlock_page(page); |
1612 | } | 1468 | } |
1613 | index = pvec.pages[nr_pages - 1]->index + 1; | 1469 | index = pvec.pages[nr_pages - 1]->index + 1; |
1614 | pagevec_release(&pvec); | 1470 | pagevec_release(&pvec); |
1615 | } | 1471 | } |
1616 | return; | ||
1617 | } | 1472 | } |
1618 | 1473 | ||
1619 | static void ext4_print_free_blocks(struct inode *inode) | 1474 | static void ext4_print_free_blocks(struct inode *inode) |
@@ -1642,215 +1497,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1642 | return; | 1497 | return; |
1643 | } | 1498 | } |
1644 | 1499 | ||
1645 | /* | ||
1646 | * mpage_da_map_and_submit - go through given space, map them | ||
1647 | * if necessary, and then submit them for I/O | ||
1648 | * | ||
1649 | * @mpd - bh describing space | ||
1650 | * | ||
1651 | * The function skips space we know is already mapped to disk blocks. | ||
1652 | * | ||
1653 | */ | ||
1654 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | ||
1655 | { | ||
1656 | int err, blks, get_blocks_flags; | ||
1657 | struct ext4_map_blocks map, *mapp = NULL; | ||
1658 | sector_t next = mpd->b_blocknr; | ||
1659 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
1660 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
1661 | handle_t *handle = NULL; | ||
1662 | |||
1663 | /* | ||
1664 | * If the blocks are mapped already, or we couldn't accumulate | ||
1665 | * any blocks, then proceed immediately to the submission stage. | ||
1666 | */ | ||
1667 | if ((mpd->b_size == 0) || | ||
1668 | ((mpd->b_state & (1 << BH_Mapped)) && | ||
1669 | !(mpd->b_state & (1 << BH_Delay)) && | ||
1670 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
1671 | goto submit_io; | ||
1672 | |||
1673 | handle = ext4_journal_current_handle(); | ||
1674 | BUG_ON(!handle); | ||
1675 | |||
1676 | /* | ||
1677 | * Call ext4_map_blocks() to allocate any delayed allocation | ||
1678 | * blocks, or to convert an uninitialized extent to be | ||
1679 | * initialized (in the case where we have written into | ||
1680 | * one or more preallocated blocks). | ||
1681 | * | ||
1682 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
1683 | * indicate that we are on the delayed allocation path. This | ||
1684 | * affects functions in many different parts of the allocation | ||
1685 | * call path. This flag exists primarily because we don't | ||
1686 | * want to change *many* call functions, so ext4_map_blocks() | ||
1687 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the | ||
1688 | * inode's allocation semaphore is taken. | ||
1689 | * | ||
1690 | * If the blocks in questions were delalloc blocks, set | ||
1691 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
1692 | * variables are updated after the blocks have been allocated. | ||
1693 | */ | ||
1694 | map.m_lblk = next; | ||
1695 | map.m_len = max_blocks; | ||
1696 | /* | ||
1697 | * We're in delalloc path and it is possible that we're going to | ||
1698 | * need more metadata blocks than previously reserved. However | ||
1699 | * we must not fail because we're in writeback and there is | ||
1700 | * nothing we can do about it so it might result in data loss. | ||
1701 | * So use reserved blocks to allocate metadata if possible. | ||
1702 | */ | ||
1703 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | ||
1704 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | ||
1705 | if (ext4_should_dioread_nolock(mpd->inode)) | ||
1706 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
1707 | if (mpd->b_state & (1 << BH_Delay)) | ||
1708 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
1709 | |||
1710 | |||
1711 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | ||
1712 | if (blks < 0) { | ||
1713 | struct super_block *sb = mpd->inode->i_sb; | ||
1714 | |||
1715 | err = blks; | ||
1716 | /* | ||
1717 | * If get block returns EAGAIN or ENOSPC and there | ||
1718 | * appears to be free blocks we will just let | ||
1719 | * mpage_da_submit_io() unlock all of the pages. | ||
1720 | */ | ||
1721 | if (err == -EAGAIN) | ||
1722 | goto submit_io; | ||
1723 | |||
1724 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { | ||
1725 | mpd->retval = err; | ||
1726 | goto submit_io; | ||
1727 | } | ||
1728 | |||
1729 | /* | ||
1730 | * get block failure will cause us to loop in | ||
1731 | * writepages, because a_ops->writepage won't be able | ||
1732 | * to make progress. The page will be redirtied by | ||
1733 | * writepage and writepages will again try to write | ||
1734 | * the same. | ||
1735 | */ | ||
1736 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { | ||
1737 | ext4_msg(sb, KERN_CRIT, | ||
1738 | "delayed block allocation failed for inode %lu " | ||
1739 | "at logical offset %llu with max blocks %zd " | ||
1740 | "with error %d", mpd->inode->i_ino, | ||
1741 | (unsigned long long) next, | ||
1742 | mpd->b_size >> mpd->inode->i_blkbits, err); | ||
1743 | ext4_msg(sb, KERN_CRIT, | ||
1744 | "This should not happen!! Data will be lost"); | ||
1745 | if (err == -ENOSPC) | ||
1746 | ext4_print_free_blocks(mpd->inode); | ||
1747 | } | ||
1748 | /* invalidate all the pages */ | ||
1749 | ext4_da_block_invalidatepages(mpd); | ||
1750 | |||
1751 | /* Mark this page range as having been completed */ | ||
1752 | mpd->io_done = 1; | ||
1753 | return; | ||
1754 | } | ||
1755 | BUG_ON(blks == 0); | ||
1756 | |||
1757 | mapp = ↦ | ||
1758 | if (map.m_flags & EXT4_MAP_NEW) { | ||
1759 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
1760 | int i; | ||
1761 | |||
1762 | for (i = 0; i < map.m_len; i++) | ||
1763 | unmap_underlying_metadata(bdev, map.m_pblk + i); | ||
1764 | } | ||
1765 | |||
1766 | /* | ||
1767 | * Update on-disk size along with block allocation. | ||
1768 | */ | ||
1769 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
1770 | if (disksize > i_size_read(mpd->inode)) | ||
1771 | disksize = i_size_read(mpd->inode); | ||
1772 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
1773 | ext4_update_i_disksize(mpd->inode, disksize); | ||
1774 | err = ext4_mark_inode_dirty(handle, mpd->inode); | ||
1775 | if (err) | ||
1776 | ext4_error(mpd->inode->i_sb, | ||
1777 | "Failed to mark inode %lu dirty", | ||
1778 | mpd->inode->i_ino); | ||
1779 | } | ||
1780 | |||
1781 | submit_io: | ||
1782 | mpage_da_submit_io(mpd, mapp); | ||
1783 | mpd->io_done = 1; | ||
1784 | } | ||
1785 | |||
1786 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | ||
1787 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1788 | |||
1789 | /* | ||
1790 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
1791 | * | ||
1792 | * @mpd->lbh - extent of blocks | ||
1793 | * @logical - logical number of the block in the file | ||
1794 | * @b_state - b_state of the buffer head added | ||
1795 | * | ||
1796 | * the function is used to collect contig. blocks in same state | ||
1797 | */ | ||
1798 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, | ||
1799 | unsigned long b_state) | ||
1800 | { | ||
1801 | sector_t next; | ||
1802 | int blkbits = mpd->inode->i_blkbits; | ||
1803 | int nrblocks = mpd->b_size >> blkbits; | ||
1804 | |||
1805 | /* | ||
1806 | * XXX Don't go larger than mballoc is willing to allocate | ||
1807 | * This is a stopgap solution. We eventually need to fold | ||
1808 | * mpage_da_submit_io() into this function and then call | ||
1809 | * ext4_map_blocks() multiple times in a loop | ||
1810 | */ | ||
1811 | if (nrblocks >= (8*1024*1024 >> blkbits)) | ||
1812 | goto flush_it; | ||
1813 | |||
1814 | /* check if the reserved journal credits might overflow */ | ||
1815 | if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) { | ||
1816 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1817 | /* | ||
1818 | * With non-extent format we are limited by the journal | ||
1819 | * credit available. Total credit needed to insert | ||
1820 | * nrblocks contiguous blocks is dependent on the | ||
1821 | * nrblocks. So limit nrblocks. | ||
1822 | */ | ||
1823 | goto flush_it; | ||
1824 | } | ||
1825 | } | ||
1826 | /* | ||
1827 | * First block in the extent | ||
1828 | */ | ||
1829 | if (mpd->b_size == 0) { | ||
1830 | mpd->b_blocknr = logical; | ||
1831 | mpd->b_size = 1 << blkbits; | ||
1832 | mpd->b_state = b_state & BH_FLAGS; | ||
1833 | return; | ||
1834 | } | ||
1835 | |||
1836 | next = mpd->b_blocknr + nrblocks; | ||
1837 | /* | ||
1838 | * Can we merge the block to our big extent? | ||
1839 | */ | ||
1840 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { | ||
1841 | mpd->b_size += 1 << blkbits; | ||
1842 | return; | ||
1843 | } | ||
1844 | |||
1845 | flush_it: | ||
1846 | /* | ||
1847 | * We couldn't merge the block to our extent, so we | ||
1848 | * need to flush current extent and start new one | ||
1849 | */ | ||
1850 | mpage_da_map_and_submit(mpd); | ||
1851 | return; | ||
1852 | } | ||
1853 | |||
1854 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | 1500 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
1855 | { | 1501 | { |
1856 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); | 1502 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
@@ -1883,6 +1529,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1883 | "logical block %lu\n", inode->i_ino, map->m_len, | 1529 | "logical block %lu\n", inode->i_ino, map->m_len, |
1884 | (unsigned long) map->m_lblk); | 1530 | (unsigned long) map->m_lblk); |
1885 | 1531 | ||
1532 | ext4_es_lru_add(inode); | ||
1533 | |||
1886 | /* Lookup extent status tree firstly */ | 1534 | /* Lookup extent status tree firstly */ |
1887 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1535 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
1888 | 1536 | ||
@@ -2156,7 +1804,7 @@ out: | |||
2156 | * lock so we have to do some magic. | 1804 | * lock so we have to do some magic. |
2157 | * | 1805 | * |
2158 | * This function can get called via... | 1806 | * This function can get called via... |
2159 | * - ext4_da_writepages after taking page lock (have journal handle) | 1807 | * - ext4_writepages after taking page lock (have journal handle) |
2160 | * - journal_submit_inode_data_buffers (no journal handle) | 1808 | * - journal_submit_inode_data_buffers (no journal handle) |
2161 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) | 1809 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) |
2162 | * - grab_page_cache when doing write_begin (have journal handle) | 1810 | * - grab_page_cache when doing write_begin (have journal handle) |
@@ -2234,76 +1882,405 @@ static int ext4_writepage(struct page *page, | |||
2234 | */ | 1882 | */ |
2235 | return __ext4_journalled_writepage(page, len); | 1883 | return __ext4_journalled_writepage(page, len); |
2236 | 1884 | ||
2237 | memset(&io_submit, 0, sizeof(io_submit)); | 1885 | ext4_io_submit_init(&io_submit, wbc); |
1886 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
1887 | if (!io_submit.io_end) { | ||
1888 | redirty_page_for_writepage(wbc, page); | ||
1889 | unlock_page(page); | ||
1890 | return -ENOMEM; | ||
1891 | } | ||
2238 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); | 1892 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); |
2239 | ext4_io_submit(&io_submit); | 1893 | ext4_io_submit(&io_submit); |
1894 | /* Drop io_end reference we got from init */ | ||
1895 | ext4_put_io_end_defer(io_submit.io_end); | ||
2240 | return ret; | 1896 | return ret; |
2241 | } | 1897 | } |
2242 | 1898 | ||
1899 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) | ||
1900 | |||
2243 | /* | 1901 | /* |
2244 | * This is called via ext4_da_writepages() to | 1902 | * mballoc gives us at most this number of blocks... |
2245 | * calculate the total number of credits to reserve to fit | 1903 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). |
2246 | * a single extent allocation into a single transaction, | 1904 | * The rest of mballoc seems to handle chunks upto full group size. |
2247 | * ext4_da_writpeages() will loop calling this before | ||
2248 | * the block allocation. | ||
2249 | */ | 1905 | */ |
1906 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 | ||
2250 | 1907 | ||
2251 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | 1908 | /* |
1909 | * mpage_add_bh_to_extent - try to add bh to extent of blocks to map | ||
1910 | * | ||
1911 | * @mpd - extent of blocks | ||
1912 | * @lblk - logical number of the block in the file | ||
1913 | * @b_state - b_state of the buffer head added | ||
1914 | * | ||
1915 | * the function is used to collect contig. blocks in same state | ||
1916 | */ | ||
1917 | static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, | ||
1918 | unsigned long b_state) | ||
1919 | { | ||
1920 | struct ext4_map_blocks *map = &mpd->map; | ||
1921 | |||
1922 | /* Don't go larger than mballoc is willing to allocate */ | ||
1923 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | ||
1924 | return 0; | ||
1925 | |||
1926 | /* First block in the extent? */ | ||
1927 | if (map->m_len == 0) { | ||
1928 | map->m_lblk = lblk; | ||
1929 | map->m_len = 1; | ||
1930 | map->m_flags = b_state & BH_FLAGS; | ||
1931 | return 1; | ||
1932 | } | ||
1933 | |||
1934 | /* Can we merge the block to our big extent? */ | ||
1935 | if (lblk == map->m_lblk + map->m_len && | ||
1936 | (b_state & BH_FLAGS) == map->m_flags) { | ||
1937 | map->m_len++; | ||
1938 | return 1; | ||
1939 | } | ||
1940 | return 0; | ||
1941 | } | ||
1942 | |||
1943 | static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, | ||
1944 | struct buffer_head *head, | ||
1945 | struct buffer_head *bh, | ||
1946 | ext4_lblk_t lblk) | ||
1947 | { | ||
1948 | struct inode *inode = mpd->inode; | ||
1949 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
1950 | >> inode->i_blkbits; | ||
1951 | |||
1952 | do { | ||
1953 | BUG_ON(buffer_locked(bh)); | ||
1954 | |||
1955 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || | ||
1956 | (!buffer_delay(bh) && !buffer_unwritten(bh)) || | ||
1957 | lblk >= blocks) { | ||
1958 | /* Found extent to map? */ | ||
1959 | if (mpd->map.m_len) | ||
1960 | return false; | ||
1961 | if (lblk >= blocks) | ||
1962 | return true; | ||
1963 | continue; | ||
1964 | } | ||
1965 | if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state)) | ||
1966 | return false; | ||
1967 | } while (lblk++, (bh = bh->b_this_page) != head); | ||
1968 | return true; | ||
1969 | } | ||
1970 | |||
1971 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | ||
2252 | { | 1972 | { |
2253 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 1973 | int len; |
1974 | loff_t size = i_size_read(mpd->inode); | ||
1975 | int err; | ||
1976 | |||
1977 | BUG_ON(page->index != mpd->first_page); | ||
1978 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
1979 | len = size & ~PAGE_CACHE_MASK; | ||
1980 | else | ||
1981 | len = PAGE_CACHE_SIZE; | ||
1982 | clear_page_dirty_for_io(page); | ||
1983 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); | ||
1984 | if (!err) | ||
1985 | mpd->wbc->nr_to_write--; | ||
1986 | mpd->first_page++; | ||
2254 | 1987 | ||
1988 | return err; | ||
1989 | } | ||
1990 | |||
1991 | /* | ||
1992 | * mpage_map_buffers - update buffers corresponding to changed extent and | ||
1993 | * submit fully mapped pages for IO | ||
1994 | * | ||
1995 | * @mpd - description of extent to map, on return next extent to map | ||
1996 | * | ||
1997 | * Scan buffers corresponding to changed extent (we expect corresponding pages | ||
1998 | * to be already locked) and update buffer state according to new extent state. | ||
1999 | * We map delalloc buffers to their physical location, clear unwritten bits, | ||
2000 | * and mark buffers as uninit when we perform writes to uninitialized extents | ||
2001 | * and do extent conversion after IO is finished. If the last page is not fully | ||
2002 | * mapped, we update @map to the next extent in the last page that needs | ||
2003 | * mapping. Otherwise we submit the page for IO. | ||
2004 | */ | ||
2005 | static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | ||
2006 | { | ||
2007 | struct pagevec pvec; | ||
2008 | int nr_pages, i; | ||
2009 | struct inode *inode = mpd->inode; | ||
2010 | struct buffer_head *head, *bh; | ||
2011 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; | ||
2012 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
2013 | >> inode->i_blkbits; | ||
2014 | pgoff_t start, end; | ||
2015 | ext4_lblk_t lblk; | ||
2016 | sector_t pblock; | ||
2017 | int err; | ||
2018 | |||
2019 | start = mpd->map.m_lblk >> bpp_bits; | ||
2020 | end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; | ||
2021 | lblk = start << bpp_bits; | ||
2022 | pblock = mpd->map.m_pblk; | ||
2023 | |||
2024 | pagevec_init(&pvec, 0); | ||
2025 | while (start <= end) { | ||
2026 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, | ||
2027 | PAGEVEC_SIZE); | ||
2028 | if (nr_pages == 0) | ||
2029 | break; | ||
2030 | for (i = 0; i < nr_pages; i++) { | ||
2031 | struct page *page = pvec.pages[i]; | ||
2032 | |||
2033 | if (page->index > end) | ||
2034 | break; | ||
2035 | /* Upto 'end' pages must be contiguous */ | ||
2036 | BUG_ON(page->index != start); | ||
2037 | bh = head = page_buffers(page); | ||
2038 | do { | ||
2039 | if (lblk < mpd->map.m_lblk) | ||
2040 | continue; | ||
2041 | if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { | ||
2042 | /* | ||
2043 | * Buffer after end of mapped extent. | ||
2044 | * Find next buffer in the page to map. | ||
2045 | */ | ||
2046 | mpd->map.m_len = 0; | ||
2047 | mpd->map.m_flags = 0; | ||
2048 | add_page_bufs_to_extent(mpd, head, bh, | ||
2049 | lblk); | ||
2050 | pagevec_release(&pvec); | ||
2051 | return 0; | ||
2052 | } | ||
2053 | if (buffer_delay(bh)) { | ||
2054 | clear_buffer_delay(bh); | ||
2055 | bh->b_blocknr = pblock++; | ||
2056 | } | ||
2057 | clear_buffer_unwritten(bh); | ||
2058 | } while (++lblk < blocks && | ||
2059 | (bh = bh->b_this_page) != head); | ||
2060 | |||
2061 | /* | ||
2062 | * FIXME: This is going to break if dioread_nolock | ||
2063 | * supports blocksize < pagesize as we will try to | ||
2064 | * convert potentially unmapped parts of inode. | ||
2065 | */ | ||
2066 | mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; | ||
2067 | /* Page fully mapped - let IO run! */ | ||
2068 | err = mpage_submit_page(mpd, page); | ||
2069 | if (err < 0) { | ||
2070 | pagevec_release(&pvec); | ||
2071 | return err; | ||
2072 | } | ||
2073 | start++; | ||
2074 | } | ||
2075 | pagevec_release(&pvec); | ||
2076 | } | ||
2077 | /* Extent fully mapped and matches with page boundary. We are done. */ | ||
2078 | mpd->map.m_len = 0; | ||
2079 | mpd->map.m_flags = 0; | ||
2080 | return 0; | ||
2081 | } | ||
2082 | |||
2083 | static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | ||
2084 | { | ||
2085 | struct inode *inode = mpd->inode; | ||
2086 | struct ext4_map_blocks *map = &mpd->map; | ||
2087 | int get_blocks_flags; | ||
2088 | int err; | ||
2089 | |||
2090 | trace_ext4_da_write_pages_extent(inode, map); | ||
2255 | /* | 2091 | /* |
2256 | * With non-extent format the journal credit needed to | 2092 | * Call ext4_map_blocks() to allocate any delayed allocation blocks, or |
2257 | * insert nrblocks contiguous block is dependent on | 2093 | * to convert an uninitialized extent to be initialized (in the case |
2258 | * number of contiguous block. So we will limit | 2094 | * where we have written into one or more preallocated blocks). It is |
2259 | * number of contiguous block to a sane value | 2095 | * possible that we're going to need more metadata blocks than |
2096 | * previously reserved. However we must not fail because we're in | ||
2097 | * writeback and there is nothing we can do about it so it might result | ||
2098 | * in data loss. So use reserved blocks to allocate metadata if | ||
2099 | * possible. | ||
2100 | * | ||
2101 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks | ||
2102 | * in question are delalloc blocks. This affects functions in many | ||
2103 | * different parts of the allocation call path. This flag exists | ||
2104 | * primarily because we don't want to change *many* call functions, so | ||
2105 | * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag | ||
2106 | * once the inode's allocation semaphore is taken. | ||
2260 | */ | 2107 | */ |
2261 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && | 2108 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
2262 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2109 | EXT4_GET_BLOCKS_METADATA_NOFAIL; |
2263 | max_blocks = EXT4_MAX_TRANS_DATA; | 2110 | if (ext4_should_dioread_nolock(inode)) |
2111 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
2112 | if (map->m_flags & (1 << BH_Delay)) | ||
2113 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
2264 | 2114 | ||
2265 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2115 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); |
2116 | if (err < 0) | ||
2117 | return err; | ||
2118 | if (map->m_flags & EXT4_MAP_UNINIT) { | ||
2119 | if (!mpd->io_submit.io_end->handle && | ||
2120 | ext4_handle_valid(handle)) { | ||
2121 | mpd->io_submit.io_end->handle = handle->h_rsv_handle; | ||
2122 | handle->h_rsv_handle = NULL; | ||
2123 | } | ||
2124 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); | ||
2125 | } | ||
2126 | |||
2127 | BUG_ON(map->m_len == 0); | ||
2128 | if (map->m_flags & EXT4_MAP_NEW) { | ||
2129 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
2130 | int i; | ||
2131 | |||
2132 | for (i = 0; i < map->m_len; i++) | ||
2133 | unmap_underlying_metadata(bdev, map->m_pblk + i); | ||
2134 | } | ||
2135 | return 0; | ||
2266 | } | 2136 | } |
2267 | 2137 | ||
2268 | /* | 2138 | /* |
2269 | * write_cache_pages_da - walk the list of dirty pages of the given | 2139 | * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length |
2270 | * address space and accumulate pages that need writing, and call | 2140 | * mpd->len and submit pages underlying it for IO |
2271 | * mpage_da_map_and_submit to map a single contiguous memory region | 2141 | * |
2272 | * and then write them. | 2142 | * @handle - handle for journal operations |
2143 | * @mpd - extent to map | ||
2144 | * | ||
2145 | * The function maps extent starting at mpd->lblk of length mpd->len. If it is | ||
2146 | * delayed, blocks are allocated, if it is unwritten, we may need to convert | ||
2147 | * them to initialized or split the described range from larger unwritten | ||
2148 | * extent. Note that we need not map all the described range since allocation | ||
2149 | * can return less blocks or the range is covered by more unwritten extents. We | ||
2150 | * cannot map more because we are limited by reserved transaction credits. On | ||
2151 | * the other hand we always make sure that the last touched page is fully | ||
2152 | * mapped so that it can be written out (and thus forward progress is | ||
2153 | * guaranteed). After mapping we submit all mapped pages for IO. | ||
2273 | */ | 2154 | */ |
2274 | static int write_cache_pages_da(handle_t *handle, | 2155 | static int mpage_map_and_submit_extent(handle_t *handle, |
2275 | struct address_space *mapping, | 2156 | struct mpage_da_data *mpd, |
2276 | struct writeback_control *wbc, | 2157 | bool *give_up_on_write) |
2277 | struct mpage_da_data *mpd, | ||
2278 | pgoff_t *done_index) | ||
2279 | { | 2158 | { |
2280 | struct buffer_head *bh, *head; | 2159 | struct inode *inode = mpd->inode; |
2281 | struct inode *inode = mapping->host; | 2160 | struct ext4_map_blocks *map = &mpd->map; |
2282 | struct pagevec pvec; | 2161 | int err; |
2283 | unsigned int nr_pages; | 2162 | loff_t disksize; |
2284 | sector_t logical; | ||
2285 | pgoff_t index, end; | ||
2286 | long nr_to_write = wbc->nr_to_write; | ||
2287 | int i, tag, ret = 0; | ||
2288 | |||
2289 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2290 | mpd->wbc = wbc; | ||
2291 | mpd->inode = inode; | ||
2292 | pagevec_init(&pvec, 0); | ||
2293 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2294 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2295 | 2163 | ||
2296 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2164 | mpd->io_submit.io_end->offset = |
2165 | ((loff_t)map->m_lblk) << inode->i_blkbits; | ||
2166 | while (map->m_len) { | ||
2167 | err = mpage_map_one_extent(handle, mpd); | ||
2168 | if (err < 0) { | ||
2169 | struct super_block *sb = inode->i_sb; | ||
2170 | |||
2171 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | ||
2172 | goto invalidate_dirty_pages; | ||
2173 | /* | ||
2174 | * Let the uper layers retry transient errors. | ||
2175 | * In the case of ENOSPC, if ext4_count_free_blocks() | ||
2176 | * is non-zero, a commit should free up blocks. | ||
2177 | */ | ||
2178 | if ((err == -ENOMEM) || | ||
2179 | (err == -ENOSPC && ext4_count_free_clusters(sb))) | ||
2180 | return err; | ||
2181 | ext4_msg(sb, KERN_CRIT, | ||
2182 | "Delayed block allocation failed for " | ||
2183 | "inode %lu at logical offset %llu with" | ||
2184 | " max blocks %u with error %d", | ||
2185 | inode->i_ino, | ||
2186 | (unsigned long long)map->m_lblk, | ||
2187 | (unsigned)map->m_len, -err); | ||
2188 | ext4_msg(sb, KERN_CRIT, | ||
2189 | "This should not happen!! Data will " | ||
2190 | "be lost\n"); | ||
2191 | if (err == -ENOSPC) | ||
2192 | ext4_print_free_blocks(inode); | ||
2193 | invalidate_dirty_pages: | ||
2194 | *give_up_on_write = true; | ||
2195 | return err; | ||
2196 | } | ||
2197 | /* | ||
2198 | * Update buffer state, submit mapped pages, and get us new | ||
2199 | * extent to map | ||
2200 | */ | ||
2201 | err = mpage_map_and_submit_buffers(mpd); | ||
2202 | if (err < 0) | ||
2203 | return err; | ||
2204 | } | ||
2205 | |||
2206 | /* Update on-disk size after IO is submitted */ | ||
2207 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | ||
2208 | if (disksize > i_size_read(inode)) | ||
2209 | disksize = i_size_read(inode); | ||
2210 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2211 | int err2; | ||
2212 | |||
2213 | ext4_update_i_disksize(inode, disksize); | ||
2214 | err2 = ext4_mark_inode_dirty(handle, inode); | ||
2215 | if (err2) | ||
2216 | ext4_error(inode->i_sb, | ||
2217 | "Failed to mark inode %lu dirty", | ||
2218 | inode->i_ino); | ||
2219 | if (!err) | ||
2220 | err = err2; | ||
2221 | } | ||
2222 | return err; | ||
2223 | } | ||
2224 | |||
2225 | /* | ||
2226 | * Calculate the total number of credits to reserve for one writepages | ||
2227 | * iteration. This is called from ext4_writepages(). We map an extent of | ||
2228 | * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping | ||
2229 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + | ||
2230 | * bpp - 1 blocks in bpp different extents. | ||
2231 | */ | ||
2232 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | ||
2233 | { | ||
2234 | int bpp = ext4_journal_blocks_per_page(inode); | ||
2235 | |||
2236 | return ext4_meta_trans_blocks(inode, | ||
2237 | MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); | ||
2238 | } | ||
2239 | |||
2240 | /* | ||
2241 | * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages | ||
2242 | * and underlying extent to map | ||
2243 | * | ||
2244 | * @mpd - where to look for pages | ||
2245 | * | ||
2246 | * Walk dirty pages in the mapping. If they are fully mapped, submit them for | ||
2247 | * IO immediately. When we find a page which isn't mapped we start accumulating | ||
2248 | * extent of buffers underlying these pages that needs mapping (formed by | ||
2249 | * either delayed or unwritten buffers). We also lock the pages containing | ||
2250 | * these buffers. The extent found is returned in @mpd structure (starting at | ||
2251 | * mpd->lblk with length mpd->len blocks). | ||
2252 | * | ||
2253 | * Note that this function can attach bios to one io_end structure which are | ||
2254 | * neither logically nor physically contiguous. Although it may seem as an | ||
2255 | * unnecessary complication, it is actually inevitable in blocksize < pagesize | ||
2256 | * case as we need to track IO to all buffers underlying a page in one io_end. | ||
2257 | */ | ||
2258 | static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | ||
2259 | { | ||
2260 | struct address_space *mapping = mpd->inode->i_mapping; | ||
2261 | struct pagevec pvec; | ||
2262 | unsigned int nr_pages; | ||
2263 | pgoff_t index = mpd->first_page; | ||
2264 | pgoff_t end = mpd->last_page; | ||
2265 | int tag; | ||
2266 | int i, err = 0; | ||
2267 | int blkbits = mpd->inode->i_blkbits; | ||
2268 | ext4_lblk_t lblk; | ||
2269 | struct buffer_head *head; | ||
2270 | |||
2271 | if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) | ||
2297 | tag = PAGECACHE_TAG_TOWRITE; | 2272 | tag = PAGECACHE_TAG_TOWRITE; |
2298 | else | 2273 | else |
2299 | tag = PAGECACHE_TAG_DIRTY; | 2274 | tag = PAGECACHE_TAG_DIRTY; |
2300 | 2275 | ||
2301 | *done_index = index; | 2276 | pagevec_init(&pvec, 0); |
2277 | mpd->map.m_len = 0; | ||
2278 | mpd->next_page = index; | ||
2302 | while (index <= end) { | 2279 | while (index <= end) { |
2303 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2280 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2304 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2281 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2305 | if (nr_pages == 0) | 2282 | if (nr_pages == 0) |
2306 | return 0; | 2283 | goto out; |
2307 | 2284 | ||
2308 | for (i = 0; i < nr_pages; i++) { | 2285 | for (i = 0; i < nr_pages; i++) { |
2309 | struct page *page = pvec.pages[i]; | 2286 | struct page *page = pvec.pages[i]; |
@@ -2318,31 +2295,21 @@ static int write_cache_pages_da(handle_t *handle, | |||
2318 | if (page->index > end) | 2295 | if (page->index > end) |
2319 | goto out; | 2296 | goto out; |
2320 | 2297 | ||
2321 | *done_index = page->index + 1; | 2298 | /* If we can't merge this page, we are done. */ |
2322 | 2299 | if (mpd->map.m_len > 0 && mpd->next_page != page->index) | |
2323 | /* | 2300 | goto out; |
2324 | * If we can't merge this page, and we have | ||
2325 | * accumulated an contiguous region, write it | ||
2326 | */ | ||
2327 | if ((mpd->next_page != page->index) && | ||
2328 | (mpd->next_page != mpd->first_page)) { | ||
2329 | mpage_da_map_and_submit(mpd); | ||
2330 | goto ret_extent_tail; | ||
2331 | } | ||
2332 | 2301 | ||
2333 | lock_page(page); | 2302 | lock_page(page); |
2334 | |||
2335 | /* | 2303 | /* |
2336 | * If the page is no longer dirty, or its | 2304 | * If the page is no longer dirty, or its mapping no |
2337 | * mapping no longer corresponds to inode we | 2305 | * longer corresponds to inode we are writing (which |
2338 | * are writing (which means it has been | 2306 | * means it has been truncated or invalidated), or the |
2339 | * truncated or invalidated), or the page is | 2307 | * page is already under writeback and we are not doing |
2340 | * already under writeback and we are not | 2308 | * a data integrity writeback, skip the page |
2341 | * doing a data integrity writeback, skip the page | ||
2342 | */ | 2309 | */ |
2343 | if (!PageDirty(page) || | 2310 | if (!PageDirty(page) || |
2344 | (PageWriteback(page) && | 2311 | (PageWriteback(page) && |
2345 | (wbc->sync_mode == WB_SYNC_NONE)) || | 2312 | (mpd->wbc->sync_mode == WB_SYNC_NONE)) || |
2346 | unlikely(page->mapping != mapping)) { | 2313 | unlikely(page->mapping != mapping)) { |
2347 | unlock_page(page); | 2314 | unlock_page(page); |
2348 | continue; | 2315 | continue; |
@@ -2351,106 +2318,70 @@ static int write_cache_pages_da(handle_t *handle, | |||
2351 | wait_on_page_writeback(page); | 2318 | wait_on_page_writeback(page); |
2352 | BUG_ON(PageWriteback(page)); | 2319 | BUG_ON(PageWriteback(page)); |
2353 | 2320 | ||
2354 | /* | 2321 | if (mpd->map.m_len == 0) |
2355 | * If we have inline data and arrive here, it means that | ||
2356 | * we will soon create the block for the 1st page, so | ||
2357 | * we'd better clear the inline data here. | ||
2358 | */ | ||
2359 | if (ext4_has_inline_data(inode)) { | ||
2360 | BUG_ON(ext4_test_inode_state(inode, | ||
2361 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2362 | ext4_destroy_inline_data(handle, inode); | ||
2363 | } | ||
2364 | |||
2365 | if (mpd->next_page != page->index) | ||
2366 | mpd->first_page = page->index; | 2322 | mpd->first_page = page->index; |
2367 | mpd->next_page = page->index + 1; | 2323 | mpd->next_page = page->index + 1; |
2368 | logical = (sector_t) page->index << | ||
2369 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2370 | |||
2371 | /* Add all dirty buffers to mpd */ | 2324 | /* Add all dirty buffers to mpd */ |
2325 | lblk = ((ext4_lblk_t)page->index) << | ||
2326 | (PAGE_CACHE_SHIFT - blkbits); | ||
2372 | head = page_buffers(page); | 2327 | head = page_buffers(page); |
2373 | bh = head; | 2328 | if (!add_page_bufs_to_extent(mpd, head, head, lblk)) |
2374 | do { | 2329 | goto out; |
2375 | BUG_ON(buffer_locked(bh)); | 2330 | /* So far everything mapped? Submit the page for IO. */ |
2376 | /* | 2331 | if (mpd->map.m_len == 0) { |
2377 | * We need to try to allocate unmapped blocks | 2332 | err = mpage_submit_page(mpd, page); |
2378 | * in the same page. Otherwise we won't make | 2333 | if (err < 0) |
2379 | * progress with the page in ext4_writepage | ||
2380 | */ | ||
2381 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2382 | mpage_add_bh_to_extent(mpd, logical, | ||
2383 | bh->b_state); | ||
2384 | if (mpd->io_done) | ||
2385 | goto ret_extent_tail; | ||
2386 | } else if (buffer_dirty(bh) && | ||
2387 | buffer_mapped(bh)) { | ||
2388 | /* | ||
2389 | * mapped dirty buffer. We need to | ||
2390 | * update the b_state because we look | ||
2391 | * at b_state in mpage_da_map_blocks. | ||
2392 | * We don't update b_size because if we | ||
2393 | * find an unmapped buffer_head later | ||
2394 | * we need to use the b_state flag of | ||
2395 | * that buffer_head. | ||
2396 | */ | ||
2397 | if (mpd->b_size == 0) | ||
2398 | mpd->b_state = | ||
2399 | bh->b_state & BH_FLAGS; | ||
2400 | } | ||
2401 | logical++; | ||
2402 | } while ((bh = bh->b_this_page) != head); | ||
2403 | |||
2404 | if (nr_to_write > 0) { | ||
2405 | nr_to_write--; | ||
2406 | if (nr_to_write == 0 && | ||
2407 | wbc->sync_mode == WB_SYNC_NONE) | ||
2408 | /* | ||
2409 | * We stop writing back only if we are | ||
2410 | * not doing integrity sync. In case of | ||
2411 | * integrity sync we have to keep going | ||
2412 | * because someone may be concurrently | ||
2413 | * dirtying pages, and we might have | ||
2414 | * synced a lot of newly appeared dirty | ||
2415 | * pages, but have not synced all of the | ||
2416 | * old dirty pages. | ||
2417 | */ | ||
2418 | goto out; | 2334 | goto out; |
2419 | } | 2335 | } |
2336 | |||
2337 | /* | ||
2338 | * Accumulated enough dirty pages? This doesn't apply | ||
2339 | * to WB_SYNC_ALL mode. For integrity sync we have to | ||
2340 | * keep going because someone may be concurrently | ||
2341 | * dirtying pages, and we might have synced a lot of | ||
2342 | * newly appeared dirty pages, but have not synced all | ||
2343 | * of the old dirty pages. | ||
2344 | */ | ||
2345 | if (mpd->wbc->sync_mode == WB_SYNC_NONE && | ||
2346 | mpd->next_page - mpd->first_page >= | ||
2347 | mpd->wbc->nr_to_write) | ||
2348 | goto out; | ||
2420 | } | 2349 | } |
2421 | pagevec_release(&pvec); | 2350 | pagevec_release(&pvec); |
2422 | cond_resched(); | 2351 | cond_resched(); |
2423 | } | 2352 | } |
2424 | return 0; | 2353 | return 0; |
2425 | ret_extent_tail: | ||
2426 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2427 | out: | 2354 | out: |
2428 | pagevec_release(&pvec); | 2355 | pagevec_release(&pvec); |
2429 | cond_resched(); | 2356 | return err; |
2430 | return ret; | ||
2431 | } | 2357 | } |
2432 | 2358 | ||
2359 | static int __writepage(struct page *page, struct writeback_control *wbc, | ||
2360 | void *data) | ||
2361 | { | ||
2362 | struct address_space *mapping = data; | ||
2363 | int ret = ext4_writepage(page, wbc); | ||
2364 | mapping_set_error(mapping, ret); | ||
2365 | return ret; | ||
2366 | } | ||
2433 | 2367 | ||
2434 | static int ext4_da_writepages(struct address_space *mapping, | 2368 | static int ext4_writepages(struct address_space *mapping, |
2435 | struct writeback_control *wbc) | 2369 | struct writeback_control *wbc) |
2436 | { | 2370 | { |
2437 | pgoff_t index; | 2371 | pgoff_t writeback_index = 0; |
2372 | long nr_to_write = wbc->nr_to_write; | ||
2438 | int range_whole = 0; | 2373 | int range_whole = 0; |
2374 | int cycled = 1; | ||
2439 | handle_t *handle = NULL; | 2375 | handle_t *handle = NULL; |
2440 | struct mpage_da_data mpd; | 2376 | struct mpage_da_data mpd; |
2441 | struct inode *inode = mapping->host; | 2377 | struct inode *inode = mapping->host; |
2442 | int pages_written = 0; | 2378 | int needed_blocks, rsv_blocks = 0, ret = 0; |
2443 | unsigned int max_pages; | ||
2444 | int range_cyclic, cycled = 1, io_done = 0; | ||
2445 | int needed_blocks, ret = 0; | ||
2446 | long desired_nr_to_write, nr_to_writebump = 0; | ||
2447 | loff_t range_start = wbc->range_start; | ||
2448 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2379 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2449 | pgoff_t done_index = 0; | 2380 | bool done; |
2450 | pgoff_t end; | ||
2451 | struct blk_plug plug; | 2381 | struct blk_plug plug; |
2382 | bool give_up_on_write = false; | ||
2452 | 2383 | ||
2453 | trace_ext4_da_writepages(inode, wbc); | 2384 | trace_ext4_writepages(inode, wbc); |
2454 | 2385 | ||
2455 | /* | 2386 | /* |
2456 | * No pages to write? This is mainly a kludge to avoid starting | 2387 | * No pages to write? This is mainly a kludge to avoid starting |
@@ -2460,164 +2391,165 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2460 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 2391 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2461 | return 0; | 2392 | return 0; |
2462 | 2393 | ||
2394 | if (ext4_should_journal_data(inode)) { | ||
2395 | struct blk_plug plug; | ||
2396 | int ret; | ||
2397 | |||
2398 | blk_start_plug(&plug); | ||
2399 | ret = write_cache_pages(mapping, wbc, __writepage, mapping); | ||
2400 | blk_finish_plug(&plug); | ||
2401 | return ret; | ||
2402 | } | ||
2403 | |||
2463 | /* | 2404 | /* |
2464 | * If the filesystem has aborted, it is read-only, so return | 2405 | * If the filesystem has aborted, it is read-only, so return |
2465 | * right away instead of dumping stack traces later on that | 2406 | * right away instead of dumping stack traces later on that |
2466 | * will obscure the real source of the problem. We test | 2407 | * will obscure the real source of the problem. We test |
2467 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because | 2408 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because |
2468 | * the latter could be true if the filesystem is mounted | 2409 | * the latter could be true if the filesystem is mounted |
2469 | * read-only, and in that case, ext4_da_writepages should | 2410 | * read-only, and in that case, ext4_writepages should |
2470 | * *never* be called, so if that ever happens, we would want | 2411 | * *never* be called, so if that ever happens, we would want |
2471 | * the stack trace. | 2412 | * the stack trace. |
2472 | */ | 2413 | */ |
2473 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2414 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2474 | return -EROFS; | 2415 | return -EROFS; |
2475 | 2416 | ||
2417 | if (ext4_should_dioread_nolock(inode)) { | ||
2418 | /* | ||
2419 | * We may need to convert upto one extent per block in | ||
2420 | * the page and we may dirty the inode. | ||
2421 | */ | ||
2422 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); | ||
2423 | } | ||
2424 | |||
2425 | /* | ||
2426 | * If we have inline data and arrive here, it means that | ||
2427 | * we will soon create the block for the 1st page, so | ||
2428 | * we'd better clear the inline data here. | ||
2429 | */ | ||
2430 | if (ext4_has_inline_data(inode)) { | ||
2431 | /* Just inode will be modified... */ | ||
2432 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | ||
2433 | if (IS_ERR(handle)) { | ||
2434 | ret = PTR_ERR(handle); | ||
2435 | goto out_writepages; | ||
2436 | } | ||
2437 | BUG_ON(ext4_test_inode_state(inode, | ||
2438 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2439 | ext4_destroy_inline_data(handle, inode); | ||
2440 | ext4_journal_stop(handle); | ||
2441 | } | ||
2442 | |||
2476 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2443 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2477 | range_whole = 1; | 2444 | range_whole = 1; |
2478 | 2445 | ||
2479 | range_cyclic = wbc->range_cyclic; | ||
2480 | if (wbc->range_cyclic) { | 2446 | if (wbc->range_cyclic) { |
2481 | index = mapping->writeback_index; | 2447 | writeback_index = mapping->writeback_index; |
2482 | if (index) | 2448 | if (writeback_index) |
2483 | cycled = 0; | 2449 | cycled = 0; |
2484 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2450 | mpd.first_page = writeback_index; |
2485 | wbc->range_end = LLONG_MAX; | 2451 | mpd.last_page = -1; |
2486 | wbc->range_cyclic = 0; | ||
2487 | end = -1; | ||
2488 | } else { | 2452 | } else { |
2489 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2453 | mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; |
2490 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2454 | mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; |
2491 | } | ||
2492 | |||
2493 | /* | ||
2494 | * This works around two forms of stupidity. The first is in | ||
2495 | * the writeback code, which caps the maximum number of pages | ||
2496 | * written to be 1024 pages. This is wrong on multiple | ||
2497 | * levels; different architectues have a different page size, | ||
2498 | * which changes the maximum amount of data which gets | ||
2499 | * written. Secondly, 4 megabytes is way too small. XFS | ||
2500 | * forces this value to be 16 megabytes by multiplying | ||
2501 | * nr_to_write parameter by four, and then relies on its | ||
2502 | * allocator to allocate larger extents to make them | ||
2503 | * contiguous. Unfortunately this brings us to the second | ||
2504 | * stupidity, which is that ext4's mballoc code only allocates | ||
2505 | * at most 2048 blocks. So we force contiguous writes up to | ||
2506 | * the number of dirty blocks in the inode, or | ||
2507 | * sbi->max_writeback_mb_bump whichever is smaller. | ||
2508 | */ | ||
2509 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | ||
2510 | if (!range_cyclic && range_whole) { | ||
2511 | if (wbc->nr_to_write == LONG_MAX) | ||
2512 | desired_nr_to_write = wbc->nr_to_write; | ||
2513 | else | ||
2514 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2515 | } else | ||
2516 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | ||
2517 | max_pages); | ||
2518 | if (desired_nr_to_write > max_pages) | ||
2519 | desired_nr_to_write = max_pages; | ||
2520 | |||
2521 | if (wbc->nr_to_write < desired_nr_to_write) { | ||
2522 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; | ||
2523 | wbc->nr_to_write = desired_nr_to_write; | ||
2524 | } | 2455 | } |
2525 | 2456 | ||
2457 | mpd.inode = inode; | ||
2458 | mpd.wbc = wbc; | ||
2459 | ext4_io_submit_init(&mpd.io_submit, wbc); | ||
2526 | retry: | 2460 | retry: |
2527 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2461 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2528 | tag_pages_for_writeback(mapping, index, end); | 2462 | tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); |
2529 | 2463 | done = false; | |
2530 | blk_start_plug(&plug); | 2464 | blk_start_plug(&plug); |
2531 | while (!ret && wbc->nr_to_write > 0) { | 2465 | while (!done && mpd.first_page <= mpd.last_page) { |
2466 | /* For each extent of pages we use new io_end */ | ||
2467 | mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); | ||
2468 | if (!mpd.io_submit.io_end) { | ||
2469 | ret = -ENOMEM; | ||
2470 | break; | ||
2471 | } | ||
2532 | 2472 | ||
2533 | /* | 2473 | /* |
2534 | * we insert one extent at a time. So we need | 2474 | * We have two constraints: We find one extent to map and we |
2535 | * credit needed for single extent allocation. | 2475 | * must always write out whole page (makes a difference when |
2536 | * journalled mode is currently not supported | 2476 | * blocksize < pagesize) so that we don't block on IO when we |
2537 | * by delalloc | 2477 | * try to write out the rest of the page. Journalled mode is |
2478 | * not supported by delalloc. | ||
2538 | */ | 2479 | */ |
2539 | BUG_ON(ext4_should_journal_data(inode)); | 2480 | BUG_ON(ext4_should_journal_data(inode)); |
2540 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2481 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2541 | 2482 | ||
2542 | /* start a new transaction*/ | 2483 | /* start a new transaction */ |
2543 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 2484 | handle = ext4_journal_start_with_reserve(inode, |
2544 | needed_blocks); | 2485 | EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); |
2545 | if (IS_ERR(handle)) { | 2486 | if (IS_ERR(handle)) { |
2546 | ret = PTR_ERR(handle); | 2487 | ret = PTR_ERR(handle); |
2547 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2488 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2548 | "%ld pages, ino %lu; err %d", __func__, | 2489 | "%ld pages, ino %lu; err %d", __func__, |
2549 | wbc->nr_to_write, inode->i_ino, ret); | 2490 | wbc->nr_to_write, inode->i_ino, ret); |
2550 | blk_finish_plug(&plug); | 2491 | /* Release allocated io_end */ |
2551 | goto out_writepages; | 2492 | ext4_put_io_end(mpd.io_submit.io_end); |
2493 | break; | ||
2552 | } | 2494 | } |
2553 | 2495 | ||
2554 | /* | 2496 | trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); |
2555 | * Now call write_cache_pages_da() to find the next | 2497 | ret = mpage_prepare_extent_to_map(&mpd); |
2556 | * contiguous region of logical blocks that need | 2498 | if (!ret) { |
2557 | * blocks to be allocated by ext4 and submit them. | 2499 | if (mpd.map.m_len) |
2558 | */ | 2500 | ret = mpage_map_and_submit_extent(handle, &mpd, |
2559 | ret = write_cache_pages_da(handle, mapping, | 2501 | &give_up_on_write); |
2560 | wbc, &mpd, &done_index); | 2502 | else { |
2561 | /* | 2503 | /* |
2562 | * If we have a contiguous extent of pages and we | 2504 | * We scanned the whole range (or exhausted |
2563 | * haven't done the I/O yet, map the blocks and submit | 2505 | * nr_to_write), submitted what was mapped and |
2564 | * them for I/O. | 2506 | * didn't find anything needing mapping. We are |
2565 | */ | 2507 | * done. |
2566 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2508 | */ |
2567 | mpage_da_map_and_submit(&mpd); | 2509 | done = true; |
2568 | ret = MPAGE_DA_EXTENT_TAIL; | 2510 | } |
2569 | } | 2511 | } |
2570 | trace_ext4_da_write_pages(inode, &mpd); | ||
2571 | wbc->nr_to_write -= mpd.pages_written; | ||
2572 | |||
2573 | ext4_journal_stop(handle); | 2512 | ext4_journal_stop(handle); |
2574 | 2513 | /* Submit prepared bio */ | |
2575 | if ((mpd.retval == -ENOSPC) && sbi->s_journal) { | 2514 | ext4_io_submit(&mpd.io_submit); |
2576 | /* commit the transaction which would | 2515 | /* Unlock pages we didn't use */ |
2516 | mpage_release_unused_pages(&mpd, give_up_on_write); | ||
2517 | /* Drop our io_end reference we got from init */ | ||
2518 | ext4_put_io_end(mpd.io_submit.io_end); | ||
2519 | |||
2520 | if (ret == -ENOSPC && sbi->s_journal) { | ||
2521 | /* | ||
2522 | * Commit the transaction which would | ||
2577 | * free blocks released in the transaction | 2523 | * free blocks released in the transaction |
2578 | * and try again | 2524 | * and try again |
2579 | */ | 2525 | */ |
2580 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2526 | jbd2_journal_force_commit_nested(sbi->s_journal); |
2581 | ret = 0; | 2527 | ret = 0; |
2582 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2528 | continue; |
2583 | /* | 2529 | } |
2584 | * Got one extent now try with rest of the pages. | 2530 | /* Fatal error - ENOMEM, EIO... */ |
2585 | * If mpd.retval is set -EIO, journal is aborted. | 2531 | if (ret) |
2586 | * So we don't need to write any more. | ||
2587 | */ | ||
2588 | pages_written += mpd.pages_written; | ||
2589 | ret = mpd.retval; | ||
2590 | io_done = 1; | ||
2591 | } else if (wbc->nr_to_write) | ||
2592 | /* | ||
2593 | * There is no more writeout needed | ||
2594 | * or we requested for a noblocking writeout | ||
2595 | * and we found the device congested | ||
2596 | */ | ||
2597 | break; | 2532 | break; |
2598 | } | 2533 | } |
2599 | blk_finish_plug(&plug); | 2534 | blk_finish_plug(&plug); |
2600 | if (!io_done && !cycled) { | 2535 | if (!ret && !cycled) { |
2601 | cycled = 1; | 2536 | cycled = 1; |
2602 | index = 0; | 2537 | mpd.last_page = writeback_index - 1; |
2603 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2538 | mpd.first_page = 0; |
2604 | wbc->range_end = mapping->writeback_index - 1; | ||
2605 | goto retry; | 2539 | goto retry; |
2606 | } | 2540 | } |
2607 | 2541 | ||
2608 | /* Update index */ | 2542 | /* Update index */ |
2609 | wbc->range_cyclic = range_cyclic; | ||
2610 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2543 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2611 | /* | 2544 | /* |
2612 | * set the writeback_index so that range_cyclic | 2545 | * Set the writeback_index so that range_cyclic |
2613 | * mode will write it back later | 2546 | * mode will write it back later |
2614 | */ | 2547 | */ |
2615 | mapping->writeback_index = done_index; | 2548 | mapping->writeback_index = mpd.first_page; |
2616 | 2549 | ||
2617 | out_writepages: | 2550 | out_writepages: |
2618 | wbc->nr_to_write -= nr_to_writebump; | 2551 | trace_ext4_writepages_result(inode, wbc, ret, |
2619 | wbc->range_start = range_start; | 2552 | nr_to_write - wbc->nr_to_write); |
2620 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | ||
2621 | return ret; | 2553 | return ret; |
2622 | } | 2554 | } |
2623 | 2555 | ||
@@ -2829,7 +2761,8 @@ static int ext4_da_write_end(struct file *file, | |||
2829 | return ret ? ret : copied; | 2761 | return ret ? ret : copied; |
2830 | } | 2762 | } |
2831 | 2763 | ||
2832 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | 2764 | static void ext4_da_invalidatepage(struct page *page, unsigned int offset, |
2765 | unsigned int length) | ||
2833 | { | 2766 | { |
2834 | /* | 2767 | /* |
2835 | * Drop reserved blocks | 2768 | * Drop reserved blocks |
@@ -2838,10 +2771,10 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | |||
2838 | if (!page_has_buffers(page)) | 2771 | if (!page_has_buffers(page)) |
2839 | goto out; | 2772 | goto out; |
2840 | 2773 | ||
2841 | ext4_da_page_release_reservation(page, offset); | 2774 | ext4_da_page_release_reservation(page, offset, length); |
2842 | 2775 | ||
2843 | out: | 2776 | out: |
2844 | ext4_invalidatepage(page, offset); | 2777 | ext4_invalidatepage(page, offset, length); |
2845 | 2778 | ||
2846 | return; | 2779 | return; |
2847 | } | 2780 | } |
@@ -2864,7 +2797,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
2864 | * laptop_mode, not even desirable). However, to do otherwise | 2797 | * laptop_mode, not even desirable). However, to do otherwise |
2865 | * would require replicating code paths in: | 2798 | * would require replicating code paths in: |
2866 | * | 2799 | * |
2867 | * ext4_da_writepages() -> | 2800 | * ext4_writepages() -> |
2868 | * write_cache_pages() ---> (via passed in callback function) | 2801 | * write_cache_pages() ---> (via passed in callback function) |
2869 | * __mpage_da_writepage() --> | 2802 | * __mpage_da_writepage() --> |
2870 | * mpage_add_bh_to_extent() | 2803 | * mpage_add_bh_to_extent() |
@@ -2989,37 +2922,40 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
2989 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2922 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
2990 | } | 2923 | } |
2991 | 2924 | ||
2992 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 2925 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
2926 | unsigned int length) | ||
2993 | { | 2927 | { |
2994 | trace_ext4_invalidatepage(page, offset); | 2928 | trace_ext4_invalidatepage(page, offset, length); |
2995 | 2929 | ||
2996 | /* No journalling happens on data buffers when this function is used */ | 2930 | /* No journalling happens on data buffers when this function is used */ |
2997 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); | 2931 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); |
2998 | 2932 | ||
2999 | block_invalidatepage(page, offset); | 2933 | block_invalidatepage(page, offset, length); |
3000 | } | 2934 | } |
3001 | 2935 | ||
3002 | static int __ext4_journalled_invalidatepage(struct page *page, | 2936 | static int __ext4_journalled_invalidatepage(struct page *page, |
3003 | unsigned long offset) | 2937 | unsigned int offset, |
2938 | unsigned int length) | ||
3004 | { | 2939 | { |
3005 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2940 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3006 | 2941 | ||
3007 | trace_ext4_journalled_invalidatepage(page, offset); | 2942 | trace_ext4_journalled_invalidatepage(page, offset, length); |
3008 | 2943 | ||
3009 | /* | 2944 | /* |
3010 | * If it's a full truncate we just forget about the pending dirtying | 2945 | * If it's a full truncate we just forget about the pending dirtying |
3011 | */ | 2946 | */ |
3012 | if (offset == 0) | 2947 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
3013 | ClearPageChecked(page); | 2948 | ClearPageChecked(page); |
3014 | 2949 | ||
3015 | return jbd2_journal_invalidatepage(journal, page, offset); | 2950 | return jbd2_journal_invalidatepage(journal, page, offset, length); |
3016 | } | 2951 | } |
3017 | 2952 | ||
3018 | /* Wrapper for aops... */ | 2953 | /* Wrapper for aops... */ |
3019 | static void ext4_journalled_invalidatepage(struct page *page, | 2954 | static void ext4_journalled_invalidatepage(struct page *page, |
3020 | unsigned long offset) | 2955 | unsigned int offset, |
2956 | unsigned int length) | ||
3021 | { | 2957 | { |
3022 | WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | 2958 | WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); |
3023 | } | 2959 | } |
3024 | 2960 | ||
3025 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2961 | static int ext4_releasepage(struct page *page, gfp_t wait) |
@@ -3067,9 +3003,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3067 | struct inode *inode = file_inode(iocb->ki_filp); | 3003 | struct inode *inode = file_inode(iocb->ki_filp); |
3068 | ext4_io_end_t *io_end = iocb->private; | 3004 | ext4_io_end_t *io_end = iocb->private; |
3069 | 3005 | ||
3070 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3006 | /* if not async direct IO just return */ |
3071 | if (!io_end || !size) | 3007 | if (!io_end) { |
3072 | goto out; | 3008 | inode_dio_done(inode); |
3009 | if (is_async) | ||
3010 | aio_complete(iocb, ret, 0); | ||
3011 | return; | ||
3012 | } | ||
3073 | 3013 | ||
3074 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3014 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
3075 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3015 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
@@ -3077,25 +3017,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3077 | size); | 3017 | size); |
3078 | 3018 | ||
3079 | iocb->private = NULL; | 3019 | iocb->private = NULL; |
3080 | |||
3081 | /* if not aio dio with unwritten extents, just free io and return */ | ||
3082 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
3083 | ext4_free_io_end(io_end); | ||
3084 | out: | ||
3085 | inode_dio_done(inode); | ||
3086 | if (is_async) | ||
3087 | aio_complete(iocb, ret, 0); | ||
3088 | return; | ||
3089 | } | ||
3090 | |||
3091 | io_end->offset = offset; | 3020 | io_end->offset = offset; |
3092 | io_end->size = size; | 3021 | io_end->size = size; |
3093 | if (is_async) { | 3022 | if (is_async) { |
3094 | io_end->iocb = iocb; | 3023 | io_end->iocb = iocb; |
3095 | io_end->result = ret; | 3024 | io_end->result = ret; |
3096 | } | 3025 | } |
3097 | 3026 | ext4_put_io_end_defer(io_end); | |
3098 | ext4_add_complete_io(io_end); | ||
3099 | } | 3027 | } |
3100 | 3028 | ||
3101 | /* | 3029 | /* |
@@ -3129,6 +3057,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3129 | get_block_t *get_block_func = NULL; | 3057 | get_block_t *get_block_func = NULL; |
3130 | int dio_flags = 0; | 3058 | int dio_flags = 0; |
3131 | loff_t final_size = offset + count; | 3059 | loff_t final_size = offset + count; |
3060 | ext4_io_end_t *io_end = NULL; | ||
3132 | 3061 | ||
3133 | /* Use the old path for reads and writes beyond i_size. */ | 3062 | /* Use the old path for reads and writes beyond i_size. */ |
3134 | if (rw != WRITE || final_size > inode->i_size) | 3063 | if (rw != WRITE || final_size > inode->i_size) |
@@ -3136,11 +3065,18 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3136 | 3065 | ||
3137 | BUG_ON(iocb->private == NULL); | 3066 | BUG_ON(iocb->private == NULL); |
3138 | 3067 | ||
3068 | /* | ||
3069 | * Make all waiters for direct IO properly wait also for extent | ||
3070 | * conversion. This also disallows race between truncate() and | ||
3071 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | ||
3072 | */ | ||
3073 | if (rw == WRITE) | ||
3074 | atomic_inc(&inode->i_dio_count); | ||
3075 | |||
3139 | /* If we do a overwrite dio, i_mutex locking can be released */ | 3076 | /* If we do a overwrite dio, i_mutex locking can be released */ |
3140 | overwrite = *((int *)iocb->private); | 3077 | overwrite = *((int *)iocb->private); |
3141 | 3078 | ||
3142 | if (overwrite) { | 3079 | if (overwrite) { |
3143 | atomic_inc(&inode->i_dio_count); | ||
3144 | down_read(&EXT4_I(inode)->i_data_sem); | 3080 | down_read(&EXT4_I(inode)->i_data_sem); |
3145 | mutex_unlock(&inode->i_mutex); | 3081 | mutex_unlock(&inode->i_mutex); |
3146 | } | 3082 | } |
@@ -3167,13 +3103,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3167 | iocb->private = NULL; | 3103 | iocb->private = NULL; |
3168 | ext4_inode_aio_set(inode, NULL); | 3104 | ext4_inode_aio_set(inode, NULL); |
3169 | if (!is_sync_kiocb(iocb)) { | 3105 | if (!is_sync_kiocb(iocb)) { |
3170 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); | 3106 | io_end = ext4_init_io_end(inode, GFP_NOFS); |
3171 | if (!io_end) { | 3107 | if (!io_end) { |
3172 | ret = -ENOMEM; | 3108 | ret = -ENOMEM; |
3173 | goto retake_lock; | 3109 | goto retake_lock; |
3174 | } | 3110 | } |
3175 | io_end->flag |= EXT4_IO_END_DIRECT; | 3111 | io_end->flag |= EXT4_IO_END_DIRECT; |
3176 | iocb->private = io_end; | 3112 | /* |
3113 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | ||
3114 | */ | ||
3115 | iocb->private = ext4_get_io_end(io_end); | ||
3177 | /* | 3116 | /* |
3178 | * we save the io structure for current async direct | 3117 | * we save the io structure for current async direct |
3179 | * IO, so that later ext4_map_blocks() could flag the | 3118 | * IO, so that later ext4_map_blocks() could flag the |
@@ -3197,33 +3136,42 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3197 | NULL, | 3136 | NULL, |
3198 | dio_flags); | 3137 | dio_flags); |
3199 | 3138 | ||
3200 | if (iocb->private) | ||
3201 | ext4_inode_aio_set(inode, NULL); | ||
3202 | /* | 3139 | /* |
3203 | * The io_end structure takes a reference to the inode, that | 3140 | * Put our reference to io_end. This can free the io_end structure e.g. |
3204 | * structure needs to be destroyed and the reference to the | 3141 | * in sync IO case or in case of error. It can even perform extent |
3205 | * inode need to be dropped, when IO is complete, even with 0 | 3142 | * conversion if all bios we submitted finished before we got here. |
3206 | * byte write, or failed. | 3143 | * Note that in that case iocb->private can be already set to NULL |
3207 | * | 3144 | * here. |
3208 | * In the successful AIO DIO case, the io_end structure will | ||
3209 | * be destroyed and the reference to the inode will be dropped | ||
3210 | * after the end_io call back function is called. | ||
3211 | * | ||
3212 | * In the case there is 0 byte write, or error case, since VFS | ||
3213 | * direct IO won't invoke the end_io call back function, we | ||
3214 | * need to free the end_io structure here. | ||
3215 | */ | 3145 | */ |
3216 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3146 | if (io_end) { |
3217 | ext4_free_io_end(iocb->private); | 3147 | ext4_inode_aio_set(inode, NULL); |
3218 | iocb->private = NULL; | 3148 | ext4_put_io_end(io_end); |
3219 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3149 | /* |
3150 | * When no IO was submitted ext4_end_io_dio() was not | ||
3151 | * called so we have to put iocb's reference. | ||
3152 | */ | ||
3153 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { | ||
3154 | WARN_ON(iocb->private != io_end); | ||
3155 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
3156 | WARN_ON(io_end->iocb); | ||
3157 | /* | ||
3158 | * Generic code already did inode_dio_done() so we | ||
3159 | * have to clear EXT4_IO_END_DIRECT to not do it for | ||
3160 | * the second time. | ||
3161 | */ | ||
3162 | io_end->flag = 0; | ||
3163 | ext4_put_io_end(io_end); | ||
3164 | iocb->private = NULL; | ||
3165 | } | ||
3166 | } | ||
3167 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3220 | EXT4_STATE_DIO_UNWRITTEN)) { | 3168 | EXT4_STATE_DIO_UNWRITTEN)) { |
3221 | int err; | 3169 | int err; |
3222 | /* | 3170 | /* |
3223 | * for non AIO case, since the IO is already | 3171 | * for non AIO case, since the IO is already |
3224 | * completed, we could do the conversion right here | 3172 | * completed, we could do the conversion right here |
3225 | */ | 3173 | */ |
3226 | err = ext4_convert_unwritten_extents(inode, | 3174 | err = ext4_convert_unwritten_extents(NULL, inode, |
3227 | offset, ret); | 3175 | offset, ret); |
3228 | if (err < 0) | 3176 | if (err < 0) |
3229 | ret = err; | 3177 | ret = err; |
@@ -3231,9 +3179,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3231 | } | 3179 | } |
3232 | 3180 | ||
3233 | retake_lock: | 3181 | retake_lock: |
3182 | if (rw == WRITE) | ||
3183 | inode_dio_done(inode); | ||
3234 | /* take i_mutex locking again if we do a ovewrite dio */ | 3184 | /* take i_mutex locking again if we do a ovewrite dio */ |
3235 | if (overwrite) { | 3185 | if (overwrite) { |
3236 | inode_dio_done(inode); | ||
3237 | up_read(&EXT4_I(inode)->i_data_sem); | 3186 | up_read(&EXT4_I(inode)->i_data_sem); |
3238 | mutex_lock(&inode->i_mutex); | 3187 | mutex_lock(&inode->i_mutex); |
3239 | } | 3188 | } |
@@ -3292,6 +3241,7 @@ static const struct address_space_operations ext4_aops = { | |||
3292 | .readpage = ext4_readpage, | 3241 | .readpage = ext4_readpage, |
3293 | .readpages = ext4_readpages, | 3242 | .readpages = ext4_readpages, |
3294 | .writepage = ext4_writepage, | 3243 | .writepage = ext4_writepage, |
3244 | .writepages = ext4_writepages, | ||
3295 | .write_begin = ext4_write_begin, | 3245 | .write_begin = ext4_write_begin, |
3296 | .write_end = ext4_write_end, | 3246 | .write_end = ext4_write_end, |
3297 | .bmap = ext4_bmap, | 3247 | .bmap = ext4_bmap, |
@@ -3307,6 +3257,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3307 | .readpage = ext4_readpage, | 3257 | .readpage = ext4_readpage, |
3308 | .readpages = ext4_readpages, | 3258 | .readpages = ext4_readpages, |
3309 | .writepage = ext4_writepage, | 3259 | .writepage = ext4_writepage, |
3260 | .writepages = ext4_writepages, | ||
3310 | .write_begin = ext4_write_begin, | 3261 | .write_begin = ext4_write_begin, |
3311 | .write_end = ext4_journalled_write_end, | 3262 | .write_end = ext4_journalled_write_end, |
3312 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3263 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -3322,7 +3273,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
3322 | .readpage = ext4_readpage, | 3273 | .readpage = ext4_readpage, |
3323 | .readpages = ext4_readpages, | 3274 | .readpages = ext4_readpages, |
3324 | .writepage = ext4_writepage, | 3275 | .writepage = ext4_writepage, |
3325 | .writepages = ext4_da_writepages, | 3276 | .writepages = ext4_writepages, |
3326 | .write_begin = ext4_da_write_begin, | 3277 | .write_begin = ext4_da_write_begin, |
3327 | .write_end = ext4_da_write_end, | 3278 | .write_end = ext4_da_write_end, |
3328 | .bmap = ext4_bmap, | 3279 | .bmap = ext4_bmap, |
@@ -3355,89 +3306,56 @@ void ext4_set_aops(struct inode *inode) | |||
3355 | inode->i_mapping->a_ops = &ext4_aops; | 3306 | inode->i_mapping->a_ops = &ext4_aops; |
3356 | } | 3307 | } |
3357 | 3308 | ||
3358 | |||
3359 | /* | 3309 | /* |
3360 | * ext4_discard_partial_page_buffers() | 3310 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
3361 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | 3311 | * up to the end of the block which corresponds to `from'. |
3362 | * This function finds and locks the page containing the offset | 3312 | * This required during truncate. We need to physically zero the tail end |
3363 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | 3313 | * of that block so it doesn't yield old data if the file is later grown. |
3364 | * Calling functions that already have the page locked should call | ||
3365 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
3366 | */ | 3314 | */ |
3367 | int ext4_discard_partial_page_buffers(handle_t *handle, | 3315 | int ext4_block_truncate_page(handle_t *handle, |
3368 | struct address_space *mapping, loff_t from, | 3316 | struct address_space *mapping, loff_t from) |
3369 | loff_t length, int flags) | ||
3370 | { | 3317 | { |
3318 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3319 | unsigned length; | ||
3320 | unsigned blocksize; | ||
3371 | struct inode *inode = mapping->host; | 3321 | struct inode *inode = mapping->host; |
3372 | struct page *page; | ||
3373 | int err = 0; | ||
3374 | 3322 | ||
3375 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3323 | blocksize = inode->i_sb->s_blocksize; |
3376 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3324 | length = blocksize - (offset & (blocksize - 1)); |
3377 | if (!page) | ||
3378 | return -ENOMEM; | ||
3379 | |||
3380 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
3381 | from, length, flags); | ||
3382 | 3325 | ||
3383 | unlock_page(page); | 3326 | return ext4_block_zero_page_range(handle, mapping, from, length); |
3384 | page_cache_release(page); | ||
3385 | return err; | ||
3386 | } | 3327 | } |
3387 | 3328 | ||
3388 | /* | 3329 | /* |
3389 | * ext4_discard_partial_page_buffers_no_lock() | 3330 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
3390 | * Zeros a page range of length 'length' starting from offset 'from'. | 3331 | * starting from file offset 'from'. The range to be zero'd must |
3391 | * Buffer heads that correspond to the block aligned regions of the | 3332 | * be contained with in one block. If the specified range exceeds |
3392 | * zeroed range will be unmapped. Unblock aligned regions | 3333 | * the end of the block it will be shortened to end of the block |
3393 | * will have the corresponding buffer head mapped if needed so that | 3334 | * that cooresponds to 'from' |
3394 | * that region of the page can be updated with the partial zero out. | ||
3395 | * | ||
3396 | * This function assumes that the page has already been locked. The | ||
3397 | * The range to be discarded must be contained with in the given page. | ||
3398 | * If the specified range exceeds the end of the page it will be shortened | ||
3399 | * to the end of the page that corresponds to 'from'. This function is | ||
3400 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
3401 | * zeroed for blocks that have been either released, or are going to be | ||
3402 | * released. | ||
3403 | * | ||
3404 | * handle: The journal handle | ||
3405 | * inode: The files inode | ||
3406 | * page: A locked page that contains the offset "from" | ||
3407 | * from: The starting byte offset (from the beginning of the file) | ||
3408 | * to begin discarding | ||
3409 | * len: The length of bytes to discard | ||
3410 | * flags: Optional flags that may be used: | ||
3411 | * | ||
3412 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
3413 | * Only zero the regions of the page whose buffer heads | ||
3414 | * have already been unmapped. This flag is appropriate | ||
3415 | * for updating the contents of a page whose blocks may | ||
3416 | * have already been released, and we only want to zero | ||
3417 | * out the regions that correspond to those released blocks. | ||
3418 | * | ||
3419 | * Returns zero on success or negative on failure. | ||
3420 | */ | 3335 | */ |
3421 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 3336 | int ext4_block_zero_page_range(handle_t *handle, |
3422 | struct inode *inode, struct page *page, loff_t from, | 3337 | struct address_space *mapping, loff_t from, loff_t length) |
3423 | loff_t length, int flags) | ||
3424 | { | 3338 | { |
3425 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3339 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3426 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | 3340 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3427 | unsigned int blocksize, max, pos; | 3341 | unsigned blocksize, max, pos; |
3428 | ext4_lblk_t iblock; | 3342 | ext4_lblk_t iblock; |
3343 | struct inode *inode = mapping->host; | ||
3429 | struct buffer_head *bh; | 3344 | struct buffer_head *bh; |
3345 | struct page *page; | ||
3430 | int err = 0; | 3346 | int err = 0; |
3431 | 3347 | ||
3432 | blocksize = inode->i_sb->s_blocksize; | 3348 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3433 | max = PAGE_CACHE_SIZE - offset; | 3349 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3350 | if (!page) | ||
3351 | return -ENOMEM; | ||
3434 | 3352 | ||
3435 | if (index != page->index) | 3353 | blocksize = inode->i_sb->s_blocksize; |
3436 | return -EINVAL; | 3354 | max = blocksize - (offset & (blocksize - 1)); |
3437 | 3355 | ||
3438 | /* | 3356 | /* |
3439 | * correct length if it does not fall between | 3357 | * correct length if it does not fall between |
3440 | * 'from' and the end of the page | 3358 | * 'from' and the end of the block |
3441 | */ | 3359 | */ |
3442 | if (length > max || length < 0) | 3360 | if (length > max || length < 0) |
3443 | length = max; | 3361 | length = max; |
@@ -3455,106 +3373,91 @@ static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | |||
3455 | iblock++; | 3373 | iblock++; |
3456 | pos += blocksize; | 3374 | pos += blocksize; |
3457 | } | 3375 | } |
3458 | 3376 | if (buffer_freed(bh)) { | |
3459 | pos = offset; | 3377 | BUFFER_TRACE(bh, "freed: skip"); |
3460 | while (pos < offset + length) { | 3378 | goto unlock; |
3461 | unsigned int end_of_block, range_to_discard; | 3379 | } |
3462 | 3380 | if (!buffer_mapped(bh)) { | |
3463 | err = 0; | 3381 | BUFFER_TRACE(bh, "unmapped"); |
3464 | 3382 | ext4_get_block(inode, iblock, bh, 0); | |
3465 | /* The length of space left to zero and unmap */ | 3383 | /* unmapped? It's a hole - nothing to do */ |
3466 | range_to_discard = offset + length - pos; | ||
3467 | |||
3468 | /* The length of space until the end of the block */ | ||
3469 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
3470 | |||
3471 | /* | ||
3472 | * Do not unmap or zero past end of block | ||
3473 | * for this buffer head | ||
3474 | */ | ||
3475 | if (range_to_discard > end_of_block) | ||
3476 | range_to_discard = end_of_block; | ||
3477 | |||
3478 | |||
3479 | /* | ||
3480 | * Skip this buffer head if we are only zeroing unampped | ||
3481 | * regions of the page | ||
3482 | */ | ||
3483 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
3484 | buffer_mapped(bh)) | ||
3485 | goto next; | ||
3486 | |||
3487 | /* If the range is block aligned, unmap */ | ||
3488 | if (range_to_discard == blocksize) { | ||
3489 | clear_buffer_dirty(bh); | ||
3490 | bh->b_bdev = NULL; | ||
3491 | clear_buffer_mapped(bh); | ||
3492 | clear_buffer_req(bh); | ||
3493 | clear_buffer_new(bh); | ||
3494 | clear_buffer_delay(bh); | ||
3495 | clear_buffer_unwritten(bh); | ||
3496 | clear_buffer_uptodate(bh); | ||
3497 | zero_user(page, pos, range_to_discard); | ||
3498 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
3499 | goto next; | ||
3500 | } | ||
3501 | |||
3502 | /* | ||
3503 | * If this block is not completely contained in the range | ||
3504 | * to be discarded, then it is not going to be released. Because | ||
3505 | * we need to keep this block, we need to make sure this part | ||
3506 | * of the page is uptodate before we modify it by writeing | ||
3507 | * partial zeros on it. | ||
3508 | */ | ||
3509 | if (!buffer_mapped(bh)) { | 3384 | if (!buffer_mapped(bh)) { |
3510 | /* | 3385 | BUFFER_TRACE(bh, "still unmapped"); |
3511 | * Buffer head must be mapped before we can read | 3386 | goto unlock; |
3512 | * from the block | ||
3513 | */ | ||
3514 | BUFFER_TRACE(bh, "unmapped"); | ||
3515 | ext4_get_block(inode, iblock, bh, 0); | ||
3516 | /* unmapped? It's a hole - nothing to do */ | ||
3517 | if (!buffer_mapped(bh)) { | ||
3518 | BUFFER_TRACE(bh, "still unmapped"); | ||
3519 | goto next; | ||
3520 | } | ||
3521 | } | 3387 | } |
3388 | } | ||
3522 | 3389 | ||
3523 | /* Ok, it's mapped. Make sure it's up-to-date */ | 3390 | /* Ok, it's mapped. Make sure it's up-to-date */ |
3524 | if (PageUptodate(page)) | 3391 | if (PageUptodate(page)) |
3525 | set_buffer_uptodate(bh); | 3392 | set_buffer_uptodate(bh); |
3526 | 3393 | ||
3527 | if (!buffer_uptodate(bh)) { | 3394 | if (!buffer_uptodate(bh)) { |
3528 | err = -EIO; | 3395 | err = -EIO; |
3529 | ll_rw_block(READ, 1, &bh); | 3396 | ll_rw_block(READ, 1, &bh); |
3530 | wait_on_buffer(bh); | 3397 | wait_on_buffer(bh); |
3531 | /* Uhhuh. Read error. Complain and punt.*/ | 3398 | /* Uhhuh. Read error. Complain and punt. */ |
3532 | if (!buffer_uptodate(bh)) | 3399 | if (!buffer_uptodate(bh)) |
3533 | goto next; | 3400 | goto unlock; |
3534 | } | 3401 | } |
3402 | if (ext4_should_journal_data(inode)) { | ||
3403 | BUFFER_TRACE(bh, "get write access"); | ||
3404 | err = ext4_journal_get_write_access(handle, bh); | ||
3405 | if (err) | ||
3406 | goto unlock; | ||
3407 | } | ||
3408 | zero_user(page, offset, length); | ||
3409 | BUFFER_TRACE(bh, "zeroed end of block"); | ||
3535 | 3410 | ||
3536 | if (ext4_should_journal_data(inode)) { | 3411 | if (ext4_should_journal_data(inode)) { |
3537 | BUFFER_TRACE(bh, "get write access"); | 3412 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
3538 | err = ext4_journal_get_write_access(handle, bh); | 3413 | } else { |
3539 | if (err) | 3414 | err = 0; |
3540 | goto next; | 3415 | mark_buffer_dirty(bh); |
3541 | } | 3416 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) |
3417 | err = ext4_jbd2_file_inode(handle, inode); | ||
3418 | } | ||
3419 | |||
3420 | unlock: | ||
3421 | unlock_page(page); | ||
3422 | page_cache_release(page); | ||
3423 | return err; | ||
3424 | } | ||
3542 | 3425 | ||
3543 | zero_user(page, pos, range_to_discard); | 3426 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
3427 | loff_t lstart, loff_t length) | ||
3428 | { | ||
3429 | struct super_block *sb = inode->i_sb; | ||
3430 | struct address_space *mapping = inode->i_mapping; | ||
3431 | unsigned partial_start, partial_end; | ||
3432 | ext4_fsblk_t start, end; | ||
3433 | loff_t byte_end = (lstart + length - 1); | ||
3434 | int err = 0; | ||
3544 | 3435 | ||
3545 | err = 0; | 3436 | partial_start = lstart & (sb->s_blocksize - 1); |
3546 | if (ext4_should_journal_data(inode)) { | 3437 | partial_end = byte_end & (sb->s_blocksize - 1); |
3547 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
3548 | } else | ||
3549 | mark_buffer_dirty(bh); | ||
3550 | 3438 | ||
3551 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | 3439 | start = lstart >> sb->s_blocksize_bits; |
3552 | next: | 3440 | end = byte_end >> sb->s_blocksize_bits; |
3553 | bh = bh->b_this_page; | ||
3554 | iblock++; | ||
3555 | pos += range_to_discard; | ||
3556 | } | ||
3557 | 3441 | ||
3442 | /* Handle partial zero within the single block */ | ||
3443 | if (start == end && | ||
3444 | (partial_start || (partial_end != sb->s_blocksize - 1))) { | ||
3445 | err = ext4_block_zero_page_range(handle, mapping, | ||
3446 | lstart, length); | ||
3447 | return err; | ||
3448 | } | ||
3449 | /* Handle partial zero out on the start of the range */ | ||
3450 | if (partial_start) { | ||
3451 | err = ext4_block_zero_page_range(handle, mapping, | ||
3452 | lstart, sb->s_blocksize); | ||
3453 | if (err) | ||
3454 | return err; | ||
3455 | } | ||
3456 | /* Handle partial zero out on the end of the range */ | ||
3457 | if (partial_end != sb->s_blocksize - 1) | ||
3458 | err = ext4_block_zero_page_range(handle, mapping, | ||
3459 | byte_end - partial_end, | ||
3460 | partial_end + 1); | ||
3558 | return err; | 3461 | return err; |
3559 | } | 3462 | } |
3560 | 3463 | ||
@@ -3580,14 +3483,12 @@ int ext4_can_truncate(struct inode *inode) | |||
3580 | * Returns: 0 on success or negative on failure | 3483 | * Returns: 0 on success or negative on failure |
3581 | */ | 3484 | */ |
3582 | 3485 | ||
3583 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | 3486 | int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) |
3584 | { | 3487 | { |
3585 | struct inode *inode = file_inode(file); | ||
3586 | struct super_block *sb = inode->i_sb; | 3488 | struct super_block *sb = inode->i_sb; |
3587 | ext4_lblk_t first_block, stop_block; | 3489 | ext4_lblk_t first_block, stop_block; |
3588 | struct address_space *mapping = inode->i_mapping; | 3490 | struct address_space *mapping = inode->i_mapping; |
3589 | loff_t first_page, last_page, page_len; | 3491 | loff_t first_block_offset, last_block_offset; |
3590 | loff_t first_page_offset, last_page_offset; | ||
3591 | handle_t *handle; | 3492 | handle_t *handle; |
3592 | unsigned int credits; | 3493 | unsigned int credits; |
3593 | int ret = 0; | 3494 | int ret = 0; |
@@ -3638,23 +3539,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3638 | offset; | 3539 | offset; |
3639 | } | 3540 | } |
3640 | 3541 | ||
3641 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 3542 | first_block_offset = round_up(offset, sb->s_blocksize); |
3642 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 3543 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; |
3643 | 3544 | ||
3644 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 3545 | /* Now release the pages and zero block aligned part of pages*/ |
3645 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 3546 | if (last_block_offset > first_block_offset) |
3646 | 3547 | truncate_pagecache_range(inode, first_block_offset, | |
3647 | /* Now release the pages */ | 3548 | last_block_offset); |
3648 | if (last_page_offset > first_page_offset) { | ||
3649 | truncate_pagecache_range(inode, first_page_offset, | ||
3650 | last_page_offset - 1); | ||
3651 | } | ||
3652 | 3549 | ||
3653 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | 3550 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
3654 | ext4_inode_block_unlocked_dio(inode); | 3551 | ext4_inode_block_unlocked_dio(inode); |
3655 | ret = ext4_flush_unwritten_io(inode); | ||
3656 | if (ret) | ||
3657 | goto out_dio; | ||
3658 | inode_dio_wait(inode); | 3552 | inode_dio_wait(inode); |
3659 | 3553 | ||
3660 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3554 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
@@ -3668,66 +3562,10 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3668 | goto out_dio; | 3562 | goto out_dio; |
3669 | } | 3563 | } |
3670 | 3564 | ||
3671 | /* | 3565 | ret = ext4_zero_partial_blocks(handle, inode, offset, |
3672 | * Now we need to zero out the non-page-aligned data in the | 3566 | length); |
3673 | * pages at the start and tail of the hole, and unmap the | 3567 | if (ret) |
3674 | * buffer heads for the block aligned regions of the page that | 3568 | goto out_stop; |
3675 | * were completely zeroed. | ||
3676 | */ | ||
3677 | if (first_page > last_page) { | ||
3678 | /* | ||
3679 | * If the file space being truncated is contained | ||
3680 | * within a page just zero out and unmap the middle of | ||
3681 | * that page | ||
3682 | */ | ||
3683 | ret = ext4_discard_partial_page_buffers(handle, | ||
3684 | mapping, offset, length, 0); | ||
3685 | |||
3686 | if (ret) | ||
3687 | goto out_stop; | ||
3688 | } else { | ||
3689 | /* | ||
3690 | * zero out and unmap the partial page that contains | ||
3691 | * the start of the hole | ||
3692 | */ | ||
3693 | page_len = first_page_offset - offset; | ||
3694 | if (page_len > 0) { | ||
3695 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3696 | offset, page_len, 0); | ||
3697 | if (ret) | ||
3698 | goto out_stop; | ||
3699 | } | ||
3700 | |||
3701 | /* | ||
3702 | * zero out and unmap the partial page that contains | ||
3703 | * the end of the hole | ||
3704 | */ | ||
3705 | page_len = offset + length - last_page_offset; | ||
3706 | if (page_len > 0) { | ||
3707 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3708 | last_page_offset, page_len, 0); | ||
3709 | if (ret) | ||
3710 | goto out_stop; | ||
3711 | } | ||
3712 | } | ||
3713 | |||
3714 | /* | ||
3715 | * If i_size is contained in the last page, we need to | ||
3716 | * unmap and zero the partial page after i_size | ||
3717 | */ | ||
3718 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
3719 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
3720 | page_len = PAGE_CACHE_SIZE - | ||
3721 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
3722 | |||
3723 | if (page_len > 0) { | ||
3724 | ret = ext4_discard_partial_page_buffers(handle, | ||
3725 | mapping, inode->i_size, page_len, 0); | ||
3726 | |||
3727 | if (ret) | ||
3728 | goto out_stop; | ||
3729 | } | ||
3730 | } | ||
3731 | 3569 | ||
3732 | first_block = (offset + sb->s_blocksize - 1) >> | 3570 | first_block = (offset + sb->s_blocksize - 1) >> |
3733 | EXT4_BLOCK_SIZE_BITS(sb); | 3571 | EXT4_BLOCK_SIZE_BITS(sb); |
@@ -3803,7 +3641,6 @@ void ext4_truncate(struct inode *inode) | |||
3803 | unsigned int credits; | 3641 | unsigned int credits; |
3804 | handle_t *handle; | 3642 | handle_t *handle; |
3805 | struct address_space *mapping = inode->i_mapping; | 3643 | struct address_space *mapping = inode->i_mapping; |
3806 | loff_t page_len; | ||
3807 | 3644 | ||
3808 | /* | 3645 | /* |
3809 | * There is a possibility that we're either freeing the inode | 3646 | * There is a possibility that we're either freeing the inode |
@@ -3830,12 +3667,6 @@ void ext4_truncate(struct inode *inode) | |||
3830 | return; | 3667 | return; |
3831 | } | 3668 | } |
3832 | 3669 | ||
3833 | /* | ||
3834 | * finish any pending end_io work so we won't run the risk of | ||
3835 | * converting any truncated blocks to initialized later | ||
3836 | */ | ||
3837 | ext4_flush_unwritten_io(inode); | ||
3838 | |||
3839 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3670 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3840 | credits = ext4_writepage_trans_blocks(inode); | 3671 | credits = ext4_writepage_trans_blocks(inode); |
3841 | else | 3672 | else |
@@ -3847,14 +3678,8 @@ void ext4_truncate(struct inode *inode) | |||
3847 | return; | 3678 | return; |
3848 | } | 3679 | } |
3849 | 3680 | ||
3850 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { | 3681 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) |
3851 | page_len = PAGE_CACHE_SIZE - | 3682 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
3852 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
3853 | |||
3854 | if (ext4_discard_partial_page_buffers(handle, | ||
3855 | mapping, inode->i_size, page_len, 0)) | ||
3856 | goto out_stop; | ||
3857 | } | ||
3858 | 3683 | ||
3859 | /* | 3684 | /* |
3860 | * We add the inode to the orphan list, so that if this | 3685 | * We add the inode to the orphan list, so that if this |
@@ -4623,7 +4448,8 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) | |||
4623 | inode->i_size >> PAGE_CACHE_SHIFT); | 4448 | inode->i_size >> PAGE_CACHE_SHIFT); |
4624 | if (!page) | 4449 | if (!page) |
4625 | return; | 4450 | return; |
4626 | ret = __ext4_journalled_invalidatepage(page, offset); | 4451 | ret = __ext4_journalled_invalidatepage(page, offset, |
4452 | PAGE_CACHE_SIZE - offset); | ||
4627 | unlock_page(page); | 4453 | unlock_page(page); |
4628 | page_cache_release(page); | 4454 | page_cache_release(page); |
4629 | if (ret != -EBUSY) | 4455 | if (ret != -EBUSY) |
@@ -4805,7 +4631,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4805 | struct kstat *stat) | 4631 | struct kstat *stat) |
4806 | { | 4632 | { |
4807 | struct inode *inode; | 4633 | struct inode *inode; |
4808 | unsigned long delalloc_blocks; | 4634 | unsigned long long delalloc_blocks; |
4809 | 4635 | ||
4810 | inode = dentry->d_inode; | 4636 | inode = dentry->d_inode; |
4811 | generic_fillattr(inode, stat); | 4637 | generic_fillattr(inode, stat); |
@@ -4823,15 +4649,16 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4823 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), | 4649 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), |
4824 | EXT4_I(inode)->i_reserved_data_blocks); | 4650 | EXT4_I(inode)->i_reserved_data_blocks); |
4825 | 4651 | ||
4826 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 4652 | stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); |
4827 | return 0; | 4653 | return 0; |
4828 | } | 4654 | } |
4829 | 4655 | ||
4830 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4656 | static int ext4_index_trans_blocks(struct inode *inode, int lblocks, |
4657 | int pextents) | ||
4831 | { | 4658 | { |
4832 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4659 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
4833 | return ext4_ind_trans_blocks(inode, nrblocks, chunk); | 4660 | return ext4_ind_trans_blocks(inode, lblocks); |
4834 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 4661 | return ext4_ext_index_trans_blocks(inode, pextents); |
4835 | } | 4662 | } |
4836 | 4663 | ||
4837 | /* | 4664 | /* |
@@ -4845,7 +4672,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4845 | * | 4672 | * |
4846 | * Also account for superblock, inode, quota and xattr blocks | 4673 | * Also account for superblock, inode, quota and xattr blocks |
4847 | */ | 4674 | */ |
4848 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4675 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
4676 | int pextents) | ||
4849 | { | 4677 | { |
4850 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 4678 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
4851 | int gdpblocks; | 4679 | int gdpblocks; |
@@ -4853,14 +4681,10 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4853 | int ret = 0; | 4681 | int ret = 0; |
4854 | 4682 | ||
4855 | /* | 4683 | /* |
4856 | * How many index blocks need to touch to modify nrblocks? | 4684 | * How many index blocks need to touch to map @lblocks logical blocks |
4857 | * The "Chunk" flag indicating whether the nrblocks is | 4685 | * to @pextents physical extents? |
4858 | * physically contiguous on disk | ||
4859 | * | ||
4860 | * For Direct IO and fallocate, they calls get_block to allocate | ||
4861 | * one single extent at a time, so they could set the "Chunk" flag | ||
4862 | */ | 4686 | */ |
4863 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | 4687 | idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); |
4864 | 4688 | ||
4865 | ret = idxblocks; | 4689 | ret = idxblocks; |
4866 | 4690 | ||
@@ -4868,12 +4692,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4868 | * Now let's see how many group bitmaps and group descriptors need | 4692 | * Now let's see how many group bitmaps and group descriptors need |
4869 | * to account | 4693 | * to account |
4870 | */ | 4694 | */ |
4871 | groups = idxblocks; | 4695 | groups = idxblocks + pextents; |
4872 | if (chunk) | ||
4873 | groups += 1; | ||
4874 | else | ||
4875 | groups += nrblocks; | ||
4876 | |||
4877 | gdpblocks = groups; | 4696 | gdpblocks = groups; |
4878 | if (groups > ngroups) | 4697 | if (groups > ngroups) |
4879 | groups = ngroups; | 4698 | groups = ngroups; |
@@ -4904,7 +4723,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
4904 | int bpp = ext4_journal_blocks_per_page(inode); | 4723 | int bpp = ext4_journal_blocks_per_page(inode); |
4905 | int ret; | 4724 | int ret; |
4906 | 4725 | ||
4907 | ret = ext4_meta_trans_blocks(inode, bpp, 0); | 4726 | ret = ext4_meta_trans_blocks(inode, bpp, bpp); |
4908 | 4727 | ||
4909 | /* Account for data blocks for journalled mode */ | 4728 | /* Account for data blocks for journalled mode */ |
4910 | if (ext4_should_journal_data(inode)) | 4729 | if (ext4_should_journal_data(inode)) |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..a9ff5e5137ca 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2105,6 +2105,7 @@ repeat: | |||
2105 | group = ac->ac_g_ex.fe_group; | 2105 | group = ac->ac_g_ex.fe_group; |
2106 | 2106 | ||
2107 | for (i = 0; i < ngroups; group++, i++) { | 2107 | for (i = 0; i < ngroups; group++, i++) { |
2108 | cond_resched(); | ||
2108 | /* | 2109 | /* |
2109 | * Artificially restricted ngroups for non-extent | 2110 | * Artificially restricted ngroups for non-extent |
2110 | * files makes group > ngroups possible on first loop. | 2111 | * files makes group > ngroups possible on first loop. |
@@ -4405,17 +4406,20 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4405 | repeat: | 4406 | repeat: |
4406 | /* allocate space in core */ | 4407 | /* allocate space in core */ |
4407 | *errp = ext4_mb_regular_allocator(ac); | 4408 | *errp = ext4_mb_regular_allocator(ac); |
4408 | if (*errp) { | 4409 | if (*errp) |
4409 | ext4_discard_allocated_blocks(ac); | 4410 | goto discard_and_exit; |
4410 | goto errout; | ||
4411 | } | ||
4412 | 4411 | ||
4413 | /* as we've just preallocated more space than | 4412 | /* as we've just preallocated more space than |
4414 | * user requested orinally, we store allocated | 4413 | * user requested originally, we store allocated |
4415 | * space in a special descriptor */ | 4414 | * space in a special descriptor */ |
4416 | if (ac->ac_status == AC_STATUS_FOUND && | 4415 | if (ac->ac_status == AC_STATUS_FOUND && |
4417 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) | 4416 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
4418 | ext4_mb_new_preallocation(ac); | 4417 | *errp = ext4_mb_new_preallocation(ac); |
4418 | if (*errp) { | ||
4419 | discard_and_exit: | ||
4420 | ext4_discard_allocated_blocks(ac); | ||
4421 | goto errout; | ||
4422 | } | ||
4419 | } | 4423 | } |
4420 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4424 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4421 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); | 4425 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); |
@@ -4612,10 +4616,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4612 | BUG_ON(bh && (count > 1)); | 4616 | BUG_ON(bh && (count > 1)); |
4613 | 4617 | ||
4614 | for (i = 0; i < count; i++) { | 4618 | for (i = 0; i < count; i++) { |
4619 | cond_resched(); | ||
4615 | if (!bh) | 4620 | if (!bh) |
4616 | tbh = sb_find_get_block(inode->i_sb, | 4621 | tbh = sb_find_get_block(inode->i_sb, |
4617 | block + i); | 4622 | block + i); |
4618 | if (unlikely(!tbh)) | 4623 | if (!tbh) |
4619 | continue; | 4624 | continue; |
4620 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4625 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4621 | inode, tbh, block + i); | 4626 | inode, tbh, block + i); |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3dcbf364022f..e86dddbd8296 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -912,7 +912,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
912 | struct page *pagep[2] = {NULL, NULL}; | 912 | struct page *pagep[2] = {NULL, NULL}; |
913 | handle_t *handle; | 913 | handle_t *handle; |
914 | ext4_lblk_t orig_blk_offset; | 914 | ext4_lblk_t orig_blk_offset; |
915 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | ||
916 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 915 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
917 | unsigned int w_flags = 0; | 916 | unsigned int w_flags = 0; |
918 | unsigned int tmp_data_size, data_size, replaced_size; | 917 | unsigned int tmp_data_size, data_size, replaced_size; |
@@ -940,8 +939,6 @@ again: | |||
940 | orig_blk_offset = orig_page_offset * blocks_per_page + | 939 | orig_blk_offset = orig_page_offset * blocks_per_page + |
941 | data_offset_in_page; | 940 | data_offset_in_page; |
942 | 941 | ||
943 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | ||
944 | |||
945 | /* Calculate data_size */ | 942 | /* Calculate data_size */ |
946 | if ((orig_blk_offset + block_len_in_page - 1) == | 943 | if ((orig_blk_offset + block_len_in_page - 1) == |
947 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 944 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6653fc35ecb7..ab2f6dc44b3a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -918,11 +918,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
918 | bh->b_data, bh->b_size, | 918 | bh->b_data, bh->b_size, |
919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | 919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) |
920 | + ((char *)de - bh->b_data))) { | 920 | + ((char *)de - bh->b_data))) { |
921 | /* On error, skip the f_pos to the next block. */ | 921 | /* silently ignore the rest of the block */ |
922 | dir_file->f_pos = (dir_file->f_pos | | 922 | break; |
923 | (dir->i_sb->s_blocksize - 1)) + 1; | ||
924 | brelse(bh); | ||
925 | return count; | ||
926 | } | 923 | } |
927 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 924 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
928 | if ((hinfo->hash < start_hash) || | 925 | if ((hinfo->hash < start_hash) || |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4acf1f78881b..48786cdb5e6c 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -46,46 +46,121 @@ void ext4_exit_pageio(void) | |||
46 | } | 46 | } |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * This function is called by ext4_evict_inode() to make sure there is | 49 | * Print an buffer I/O error compatible with the fs/buffer.c. This |
50 | * no more pending I/O completion work left to do. | 50 | * provides compatibility with dmesg scrapers that look for a specific |
51 | * buffer I/O error message. We really need a unified error reporting | ||
52 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
53 | * probably not going to happen in my lifetime, due to LKML politics... | ||
51 | */ | 54 | */ |
52 | void ext4_ioend_shutdown(struct inode *inode) | 55 | static void buffer_io_error(struct buffer_head *bh) |
53 | { | 56 | { |
54 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 57 | char b[BDEVNAME_SIZE]; |
58 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
59 | bdevname(bh->b_bdev, b), | ||
60 | (unsigned long long)bh->b_blocknr); | ||
61 | } | ||
55 | 62 | ||
56 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 63 | static void ext4_finish_bio(struct bio *bio) |
57 | /* | 64 | { |
58 | * We need to make sure the work structure is finished being | 65 | int i; |
59 | * used before we let the inode get destroyed. | 66 | int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); |
60 | */ | 67 | |
61 | if (work_pending(&EXT4_I(inode)->i_unwritten_work)) | 68 | for (i = 0; i < bio->bi_vcnt; i++) { |
62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | 69 | struct bio_vec *bvec = &bio->bi_io_vec[i]; |
70 | struct page *page = bvec->bv_page; | ||
71 | struct buffer_head *bh, *head; | ||
72 | unsigned bio_start = bvec->bv_offset; | ||
73 | unsigned bio_end = bio_start + bvec->bv_len; | ||
74 | unsigned under_io = 0; | ||
75 | unsigned long flags; | ||
76 | |||
77 | if (!page) | ||
78 | continue; | ||
79 | |||
80 | if (error) { | ||
81 | SetPageError(page); | ||
82 | set_bit(AS_EIO, &page->mapping->flags); | ||
83 | } | ||
84 | bh = head = page_buffers(page); | ||
85 | /* | ||
86 | * We check all buffers in the page under BH_Uptodate_Lock | ||
87 | * to avoid races with other end io clearing async_write flags | ||
88 | */ | ||
89 | local_irq_save(flags); | ||
90 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | ||
91 | do { | ||
92 | if (bh_offset(bh) < bio_start || | ||
93 | bh_offset(bh) + bh->b_size > bio_end) { | ||
94 | if (buffer_async_write(bh)) | ||
95 | under_io++; | ||
96 | continue; | ||
97 | } | ||
98 | clear_buffer_async_write(bh); | ||
99 | if (error) | ||
100 | buffer_io_error(bh); | ||
101 | } while ((bh = bh->b_this_page) != head); | ||
102 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
103 | local_irq_restore(flags); | ||
104 | if (!under_io) | ||
105 | end_page_writeback(page); | ||
106 | } | ||
63 | } | 107 | } |
64 | 108 | ||
65 | void ext4_free_io_end(ext4_io_end_t *io) | 109 | static void ext4_release_io_end(ext4_io_end_t *io_end) |
66 | { | 110 | { |
67 | BUG_ON(!io); | 111 | struct bio *bio, *next_bio; |
68 | BUG_ON(!list_empty(&io->list)); | 112 | |
69 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | 113 | BUG_ON(!list_empty(&io_end->list)); |
114 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
115 | WARN_ON(io_end->handle); | ||
70 | 116 | ||
71 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) | 117 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) |
72 | wake_up_all(ext4_ioend_wq(io->inode)); | 118 | wake_up_all(ext4_ioend_wq(io_end->inode)); |
73 | kmem_cache_free(io_end_cachep, io); | 119 | |
120 | for (bio = io_end->bio; bio; bio = next_bio) { | ||
121 | next_bio = bio->bi_private; | ||
122 | ext4_finish_bio(bio); | ||
123 | bio_put(bio); | ||
124 | } | ||
125 | if (io_end->flag & EXT4_IO_END_DIRECT) | ||
126 | inode_dio_done(io_end->inode); | ||
127 | if (io_end->iocb) | ||
128 | aio_complete(io_end->iocb, io_end->result, 0); | ||
129 | kmem_cache_free(io_end_cachep, io_end); | ||
74 | } | 130 | } |
75 | 131 | ||
76 | /* check a range of space and convert unwritten extents to written. */ | 132 | static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) |
133 | { | ||
134 | struct inode *inode = io_end->inode; | ||
135 | |||
136 | io_end->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
137 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
138 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | ||
139 | wake_up_all(ext4_ioend_wq(inode)); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Check a range of space and convert unwritten extents to written. Note that | ||
144 | * we are protected from truncate touching same part of extent tree by the | ||
145 | * fact that truncate code waits for all DIO to finish (thus exclusion from | ||
146 | * direct IO is achieved) and also waits for PageWriteback bits. Thus we | ||
147 | * cannot get to ext4_ext_truncate() before all IOs overlapping that range are | ||
148 | * completed (happens from ext4_free_ioend()). | ||
149 | */ | ||
77 | static int ext4_end_io(ext4_io_end_t *io) | 150 | static int ext4_end_io(ext4_io_end_t *io) |
78 | { | 151 | { |
79 | struct inode *inode = io->inode; | 152 | struct inode *inode = io->inode; |
80 | loff_t offset = io->offset; | 153 | loff_t offset = io->offset; |
81 | ssize_t size = io->size; | 154 | ssize_t size = io->size; |
155 | handle_t *handle = io->handle; | ||
82 | int ret = 0; | 156 | int ret = 0; |
83 | 157 | ||
84 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 158 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
85 | "list->prev 0x%p\n", | 159 | "list->prev 0x%p\n", |
86 | io, inode->i_ino, io->list.next, io->list.prev); | 160 | io, inode->i_ino, io->list.next, io->list.prev); |
87 | 161 | ||
88 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 162 | io->handle = NULL; /* Following call will use up the handle */ |
163 | ret = ext4_convert_unwritten_extents(handle, inode, offset, size); | ||
89 | if (ret < 0) { | 164 | if (ret < 0) { |
90 | ext4_msg(inode->i_sb, KERN_EMERG, | 165 | ext4_msg(inode->i_sb, KERN_EMERG, |
91 | "failed to convert unwritten extents to written " | 166 | "failed to convert unwritten extents to written " |
@@ -93,30 +168,22 @@ static int ext4_end_io(ext4_io_end_t *io) | |||
93 | "(inode %lu, offset %llu, size %zd, error %d)", | 168 | "(inode %lu, offset %llu, size %zd, error %d)", |
94 | inode->i_ino, offset, size, ret); | 169 | inode->i_ino, offset, size, ret); |
95 | } | 170 | } |
96 | /* Wake up anyone waiting on unwritten extent conversion */ | 171 | ext4_clear_io_unwritten_flag(io); |
97 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | 172 | ext4_release_io_end(io); |
98 | wake_up_all(ext4_ioend_wq(inode)); | ||
99 | if (io->flag & EXT4_IO_END_DIRECT) | ||
100 | inode_dio_done(inode); | ||
101 | if (io->iocb) | ||
102 | aio_complete(io->iocb, io->result, 0); | ||
103 | return ret; | 173 | return ret; |
104 | } | 174 | } |
105 | 175 | ||
106 | static void dump_completed_IO(struct inode *inode) | 176 | static void dump_completed_IO(struct inode *inode, struct list_head *head) |
107 | { | 177 | { |
108 | #ifdef EXT4FS_DEBUG | 178 | #ifdef EXT4FS_DEBUG |
109 | struct list_head *cur, *before, *after; | 179 | struct list_head *cur, *before, *after; |
110 | ext4_io_end_t *io, *io0, *io1; | 180 | ext4_io_end_t *io, *io0, *io1; |
111 | 181 | ||
112 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { | 182 | if (list_empty(head)) |
113 | ext4_debug("inode %lu completed_io list is empty\n", | ||
114 | inode->i_ino); | ||
115 | return; | 183 | return; |
116 | } | ||
117 | 184 | ||
118 | ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); | 185 | ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); |
119 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { | 186 | list_for_each_entry(io, head, list) { |
120 | cur = &io->list; | 187 | cur = &io->list; |
121 | before = cur->prev; | 188 | before = cur->prev; |
122 | io0 = container_of(before, ext4_io_end_t, list); | 189 | io0 = container_of(before, ext4_io_end_t, list); |
@@ -130,23 +197,30 @@ static void dump_completed_IO(struct inode *inode) | |||
130 | } | 197 | } |
131 | 198 | ||
132 | /* Add the io_end to per-inode completed end_io list. */ | 199 | /* Add the io_end to per-inode completed end_io list. */ |
133 | void ext4_add_complete_io(ext4_io_end_t *io_end) | 200 | static void ext4_add_complete_io(ext4_io_end_t *io_end) |
134 | { | 201 | { |
135 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); | 202 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
136 | struct workqueue_struct *wq; | 203 | struct workqueue_struct *wq; |
137 | unsigned long flags; | 204 | unsigned long flags; |
138 | 205 | ||
139 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | 206 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); |
140 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
141 | |||
142 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 207 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
143 | if (list_empty(&ei->i_completed_io_list)) | 208 | if (io_end->handle) { |
144 | queue_work(wq, &ei->i_unwritten_work); | 209 | wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; |
145 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 210 | if (list_empty(&ei->i_rsv_conversion_list)) |
211 | queue_work(wq, &ei->i_rsv_conversion_work); | ||
212 | list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); | ||
213 | } else { | ||
214 | wq = EXT4_SB(io_end->inode->i_sb)->unrsv_conversion_wq; | ||
215 | if (list_empty(&ei->i_unrsv_conversion_list)) | ||
216 | queue_work(wq, &ei->i_unrsv_conversion_work); | ||
217 | list_add_tail(&io_end->list, &ei->i_unrsv_conversion_list); | ||
218 | } | ||
146 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 219 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
147 | } | 220 | } |
148 | 221 | ||
149 | static int ext4_do_flush_completed_IO(struct inode *inode) | 222 | static int ext4_do_flush_completed_IO(struct inode *inode, |
223 | struct list_head *head) | ||
150 | { | 224 | { |
151 | ext4_io_end_t *io; | 225 | ext4_io_end_t *io; |
152 | struct list_head unwritten; | 226 | struct list_head unwritten; |
@@ -155,8 +229,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
155 | int err, ret = 0; | 229 | int err, ret = 0; |
156 | 230 | ||
157 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 231 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
158 | dump_completed_IO(inode); | 232 | dump_completed_IO(inode, head); |
159 | list_replace_init(&ei->i_completed_io_list, &unwritten); | 233 | list_replace_init(head, &unwritten); |
160 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 234 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
161 | 235 | ||
162 | while (!list_empty(&unwritten)) { | 236 | while (!list_empty(&unwritten)) { |
@@ -167,30 +241,25 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
167 | err = ext4_end_io(io); | 241 | err = ext4_end_io(io); |
168 | if (unlikely(!ret && err)) | 242 | if (unlikely(!ret && err)) |
169 | ret = err; | 243 | ret = err; |
170 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
171 | ext4_free_io_end(io); | ||
172 | } | 244 | } |
173 | return ret; | 245 | return ret; |
174 | } | 246 | } |
175 | 247 | ||
176 | /* | 248 | /* |
177 | * work on completed aio dio IO, to convert unwritten extents to extents | 249 | * work on completed IO, to convert unwritten extents to extents |
178 | */ | 250 | */ |
179 | void ext4_end_io_work(struct work_struct *work) | 251 | void ext4_end_io_rsv_work(struct work_struct *work) |
180 | { | 252 | { |
181 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, | 253 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
182 | i_unwritten_work); | 254 | i_rsv_conversion_work); |
183 | ext4_do_flush_completed_IO(&ei->vfs_inode); | 255 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); |
184 | } | 256 | } |
185 | 257 | ||
186 | int ext4_flush_unwritten_io(struct inode *inode) | 258 | void ext4_end_io_unrsv_work(struct work_struct *work) |
187 | { | 259 | { |
188 | int ret; | 260 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
189 | WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && | 261 | i_unrsv_conversion_work); |
190 | !(inode->i_state & I_FREEING)); | 262 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_unrsv_conversion_list); |
191 | ret = ext4_do_flush_completed_IO(inode); | ||
192 | ext4_unwritten_wait(inode); | ||
193 | return ret; | ||
194 | } | 263 | } |
195 | 264 | ||
196 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | 265 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
@@ -200,83 +269,70 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | |||
200 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | 269 | atomic_inc(&EXT4_I(inode)->i_ioend_count); |
201 | io->inode = inode; | 270 | io->inode = inode; |
202 | INIT_LIST_HEAD(&io->list); | 271 | INIT_LIST_HEAD(&io->list); |
272 | atomic_set(&io->count, 1); | ||
203 | } | 273 | } |
204 | return io; | 274 | return io; |
205 | } | 275 | } |
206 | 276 | ||
207 | /* | 277 | void ext4_put_io_end_defer(ext4_io_end_t *io_end) |
208 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
209 | * provides compatibility with dmesg scrapers that look for a specific | ||
210 | * buffer I/O error message. We really need a unified error reporting | ||
211 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
212 | * probably not going to happen in my lifetime, due to LKML politics... | ||
213 | */ | ||
214 | static void buffer_io_error(struct buffer_head *bh) | ||
215 | { | 278 | { |
216 | char b[BDEVNAME_SIZE]; | 279 | if (atomic_dec_and_test(&io_end->count)) { |
217 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | 280 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { |
218 | bdevname(bh->b_bdev, b), | 281 | ext4_release_io_end(io_end); |
219 | (unsigned long long)bh->b_blocknr); | 282 | return; |
283 | } | ||
284 | ext4_add_complete_io(io_end); | ||
285 | } | ||
286 | } | ||
287 | |||
288 | int ext4_put_io_end(ext4_io_end_t *io_end) | ||
289 | { | ||
290 | int err = 0; | ||
291 | |||
292 | if (atomic_dec_and_test(&io_end->count)) { | ||
293 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { | ||
294 | err = ext4_convert_unwritten_extents(io_end->handle, | ||
295 | io_end->inode, io_end->offset, | ||
296 | io_end->size); | ||
297 | io_end->handle = NULL; | ||
298 | ext4_clear_io_unwritten_flag(io_end); | ||
299 | } | ||
300 | ext4_release_io_end(io_end); | ||
301 | } | ||
302 | return err; | ||
303 | } | ||
304 | |||
305 | ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) | ||
306 | { | ||
307 | atomic_inc(&io_end->count); | ||
308 | return io_end; | ||
220 | } | 309 | } |
221 | 310 | ||
222 | static void ext4_end_bio(struct bio *bio, int error) | 311 | static void ext4_end_bio(struct bio *bio, int error) |
223 | { | 312 | { |
224 | ext4_io_end_t *io_end = bio->bi_private; | 313 | ext4_io_end_t *io_end = bio->bi_private; |
225 | struct inode *inode; | ||
226 | int i; | ||
227 | int blocksize; | ||
228 | sector_t bi_sector = bio->bi_sector; | 314 | sector_t bi_sector = bio->bi_sector; |
229 | 315 | ||
230 | BUG_ON(!io_end); | 316 | BUG_ON(!io_end); |
231 | inode = io_end->inode; | ||
232 | blocksize = 1 << inode->i_blkbits; | ||
233 | bio->bi_private = NULL; | ||
234 | bio->bi_end_io = NULL; | 317 | bio->bi_end_io = NULL; |
235 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 318 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
236 | error = 0; | 319 | error = 0; |
237 | for (i = 0; i < bio->bi_vcnt; i++) { | ||
238 | struct bio_vec *bvec = &bio->bi_io_vec[i]; | ||
239 | struct page *page = bvec->bv_page; | ||
240 | struct buffer_head *bh, *head; | ||
241 | unsigned bio_start = bvec->bv_offset; | ||
242 | unsigned bio_end = bio_start + bvec->bv_len; | ||
243 | unsigned under_io = 0; | ||
244 | unsigned long flags; | ||
245 | 320 | ||
246 | if (!page) | 321 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { |
247 | continue; | ||
248 | |||
249 | if (error) { | ||
250 | SetPageError(page); | ||
251 | set_bit(AS_EIO, &page->mapping->flags); | ||
252 | } | ||
253 | bh = head = page_buffers(page); | ||
254 | /* | 322 | /* |
255 | * We check all buffers in the page under BH_Uptodate_Lock | 323 | * Link bio into list hanging from io_end. We have to do it |
256 | * to avoid races with other end io clearing async_write flags | 324 | * atomically as bio completions can be racing against each |
325 | * other. | ||
257 | */ | 326 | */ |
258 | local_irq_save(flags); | 327 | bio->bi_private = xchg(&io_end->bio, bio); |
259 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | 328 | } else { |
260 | do { | 329 | ext4_finish_bio(bio); |
261 | if (bh_offset(bh) < bio_start || | 330 | bio_put(bio); |
262 | bh_offset(bh) + blocksize > bio_end) { | ||
263 | if (buffer_async_write(bh)) | ||
264 | under_io++; | ||
265 | continue; | ||
266 | } | ||
267 | clear_buffer_async_write(bh); | ||
268 | if (error) | ||
269 | buffer_io_error(bh); | ||
270 | } while ((bh = bh->b_this_page) != head); | ||
271 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
272 | local_irq_restore(flags); | ||
273 | if (!under_io) | ||
274 | end_page_writeback(page); | ||
275 | } | 331 | } |
276 | bio_put(bio); | ||
277 | 332 | ||
278 | if (error) { | 333 | if (error) { |
279 | io_end->flag |= EXT4_IO_END_ERROR; | 334 | struct inode *inode = io_end->inode; |
335 | |||
280 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | 336 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " |
281 | "(offset %llu size %ld starting block %llu)", | 337 | "(offset %llu size %ld starting block %llu)", |
282 | inode->i_ino, | 338 | inode->i_ino, |
@@ -285,13 +341,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
285 | (unsigned long long) | 341 | (unsigned long long) |
286 | bi_sector >> (inode->i_blkbits - 9)); | 342 | bi_sector >> (inode->i_blkbits - 9)); |
287 | } | 343 | } |
288 | 344 | ext4_put_io_end_defer(io_end); | |
289 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
290 | ext4_free_io_end(io_end); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | ext4_add_complete_io(io_end); | ||
295 | } | 345 | } |
296 | 346 | ||
297 | void ext4_io_submit(struct ext4_io_submit *io) | 347 | void ext4_io_submit(struct ext4_io_submit *io) |
@@ -305,43 +355,38 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
305 | bio_put(io->io_bio); | 355 | bio_put(io->io_bio); |
306 | } | 356 | } |
307 | io->io_bio = NULL; | 357 | io->io_bio = NULL; |
308 | io->io_op = 0; | 358 | } |
359 | |||
360 | void ext4_io_submit_init(struct ext4_io_submit *io, | ||
361 | struct writeback_control *wbc) | ||
362 | { | ||
363 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
364 | io->io_bio = NULL; | ||
309 | io->io_end = NULL; | 365 | io->io_end = NULL; |
310 | } | 366 | } |
311 | 367 | ||
312 | static int io_submit_init(struct ext4_io_submit *io, | 368 | static int io_submit_init_bio(struct ext4_io_submit *io, |
313 | struct inode *inode, | 369 | struct buffer_head *bh) |
314 | struct writeback_control *wbc, | ||
315 | struct buffer_head *bh) | ||
316 | { | 370 | { |
317 | ext4_io_end_t *io_end; | ||
318 | struct page *page = bh->b_page; | ||
319 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 371 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
320 | struct bio *bio; | 372 | struct bio *bio; |
321 | 373 | ||
322 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
323 | if (!io_end) | ||
324 | return -ENOMEM; | ||
325 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); | 374 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); |
375 | if (!bio) | ||
376 | return -ENOMEM; | ||
326 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 377 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
327 | bio->bi_bdev = bh->b_bdev; | 378 | bio->bi_bdev = bh->b_bdev; |
328 | bio->bi_private = io->io_end = io_end; | ||
329 | bio->bi_end_io = ext4_end_bio; | 379 | bio->bi_end_io = ext4_end_bio; |
330 | 380 | bio->bi_private = ext4_get_io_end(io->io_end); | |
331 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
332 | |||
333 | io->io_bio = bio; | 381 | io->io_bio = bio; |
334 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
335 | io->io_next_block = bh->b_blocknr; | 382 | io->io_next_block = bh->b_blocknr; |
336 | return 0; | 383 | return 0; |
337 | } | 384 | } |
338 | 385 | ||
339 | static int io_submit_add_bh(struct ext4_io_submit *io, | 386 | static int io_submit_add_bh(struct ext4_io_submit *io, |
340 | struct inode *inode, | 387 | struct inode *inode, |
341 | struct writeback_control *wbc, | ||
342 | struct buffer_head *bh) | 388 | struct buffer_head *bh) |
343 | { | 389 | { |
344 | ext4_io_end_t *io_end; | ||
345 | int ret; | 390 | int ret; |
346 | 391 | ||
347 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | 392 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { |
@@ -349,18 +394,14 @@ submit_and_retry: | |||
349 | ext4_io_submit(io); | 394 | ext4_io_submit(io); |
350 | } | 395 | } |
351 | if (io->io_bio == NULL) { | 396 | if (io->io_bio == NULL) { |
352 | ret = io_submit_init(io, inode, wbc, bh); | 397 | ret = io_submit_init_bio(io, bh); |
353 | if (ret) | 398 | if (ret) |
354 | return ret; | 399 | return ret; |
355 | } | 400 | } |
356 | io_end = io->io_end; | ||
357 | if (test_clear_buffer_uninit(bh)) | ||
358 | ext4_set_io_unwritten_flag(inode, io_end); | ||
359 | io->io_end->size += bh->b_size; | ||
360 | io->io_next_block++; | ||
361 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 401 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
362 | if (ret != bh->b_size) | 402 | if (ret != bh->b_size) |
363 | goto submit_and_retry; | 403 | goto submit_and_retry; |
404 | io->io_next_block++; | ||
364 | return 0; | 405 | return 0; |
365 | } | 406 | } |
366 | 407 | ||
@@ -432,7 +473,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
432 | do { | 473 | do { |
433 | if (!buffer_async_write(bh)) | 474 | if (!buffer_async_write(bh)) |
434 | continue; | 475 | continue; |
435 | ret = io_submit_add_bh(io, inode, wbc, bh); | 476 | ret = io_submit_add_bh(io, inode, bh); |
436 | if (ret) { | 477 | if (ret) { |
437 | /* | 478 | /* |
438 | * We only get here on ENOMEM. Not much else | 479 | * We only get here on ENOMEM. Not much else |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b27c96d01965..c5adbb318a90 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -79,12 +79,20 @@ static int verify_group_input(struct super_block *sb, | |||
79 | ext4_fsblk_t end = start + input->blocks_count; | 79 | ext4_fsblk_t end = start + input->blocks_count; |
80 | ext4_group_t group = input->group; | 80 | ext4_group_t group = input->group; |
81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; | 81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; |
82 | unsigned overhead = ext4_group_overhead_blocks(sb, group); | 82 | unsigned overhead; |
83 | ext4_fsblk_t metaend = start + overhead; | 83 | ext4_fsblk_t metaend; |
84 | struct buffer_head *bh = NULL; | 84 | struct buffer_head *bh = NULL; |
85 | ext4_grpblk_t free_blocks_count, offset; | 85 | ext4_grpblk_t free_blocks_count, offset; |
86 | int err = -EINVAL; | 86 | int err = -EINVAL; |
87 | 87 | ||
88 | if (group != sbi->s_groups_count) { | ||
89 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
90 | input->group, sbi->s_groups_count); | ||
91 | return -EINVAL; | ||
92 | } | ||
93 | |||
94 | overhead = ext4_group_overhead_blocks(sb, group); | ||
95 | metaend = start + overhead; | ||
88 | input->free_blocks_count = free_blocks_count = | 96 | input->free_blocks_count = free_blocks_count = |
89 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; | 97 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; |
90 | 98 | ||
@@ -96,10 +104,7 @@ static int verify_group_input(struct super_block *sb, | |||
96 | free_blocks_count, input->reserved_blocks); | 104 | free_blocks_count, input->reserved_blocks); |
97 | 105 | ||
98 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); | 106 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); |
99 | if (group != sbi->s_groups_count) | 107 | if (offset != 0) |
100 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
101 | input->group, sbi->s_groups_count); | ||
102 | else if (offset != 0) | ||
103 | ext4_warning(sb, "Last group not full"); | 108 | ext4_warning(sb, "Last group not full"); |
104 | else if (input->reserved_blocks > input->blocks_count / 5) | 109 | else if (input->reserved_blocks > input->blocks_count / 5) |
105 | ext4_warning(sb, "Reserved blocks too high (%u)", | 110 | ext4_warning(sb, "Reserved blocks too high (%u)", |
@@ -1551,11 +1556,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
1551 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 1556 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? |
1552 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1557 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
1553 | struct inode *inode = NULL; | 1558 | struct inode *inode = NULL; |
1554 | int gdb_off, gdb_num; | 1559 | int gdb_off; |
1555 | int err; | 1560 | int err; |
1556 | __u16 bg_flags = 0; | 1561 | __u16 bg_flags = 0; |
1557 | 1562 | ||
1558 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | ||
1559 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); | 1563 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); |
1560 | 1564 | ||
1561 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1565 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, |
@@ -1656,12 +1660,10 @@ errout: | |||
1656 | err = err2; | 1660 | err = err2; |
1657 | 1661 | ||
1658 | if (!err) { | 1662 | if (!err) { |
1659 | ext4_fsblk_t first_block; | ||
1660 | first_block = ext4_group_first_block_no(sb, 0); | ||
1661 | if (test_opt(sb, DEBUG)) | 1663 | if (test_opt(sb, DEBUG)) |
1662 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | 1664 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " |
1663 | "blocks\n", ext4_blocks_count(es)); | 1665 | "blocks\n", ext4_blocks_count(es)); |
1664 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, | 1666 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, |
1665 | (char *)es, sizeof(struct ext4_super_block), 0); | 1667 | (char *)es, sizeof(struct ext4_super_block), 0); |
1666 | } | 1668 | } |
1667 | return err; | 1669 | return err; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..85b3dd60169b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -69,6 +69,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
69 | static void ext4_clear_journal_err(struct super_block *sb, | 69 | static void ext4_clear_journal_err(struct super_block *sb, |
70 | struct ext4_super_block *es); | 70 | struct ext4_super_block *es); |
71 | static int ext4_sync_fs(struct super_block *sb, int wait); | 71 | static int ext4_sync_fs(struct super_block *sb, int wait); |
72 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); | ||
72 | static int ext4_remount(struct super_block *sb, int *flags, char *data); | 73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); |
73 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | 74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); |
74 | static int ext4_unfreeze(struct super_block *sb); | 75 | static int ext4_unfreeze(struct super_block *sb); |
@@ -398,6 +399,11 @@ static void ext4_handle_error(struct super_block *sb) | |||
398 | } | 399 | } |
399 | if (test_opt(sb, ERRORS_RO)) { | 400 | if (test_opt(sb, ERRORS_RO)) { |
400 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 401 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
402 | /* | ||
403 | * Make sure updated value of ->s_mount_flags will be visible | ||
404 | * before ->s_flags update | ||
405 | */ | ||
406 | smp_wmb(); | ||
401 | sb->s_flags |= MS_RDONLY; | 407 | sb->s_flags |= MS_RDONLY; |
402 | } | 408 | } |
403 | if (test_opt(sb, ERRORS_PANIC)) | 409 | if (test_opt(sb, ERRORS_PANIC)) |
@@ -422,9 +428,9 @@ void __ext4_error(struct super_block *sb, const char *function, | |||
422 | ext4_handle_error(sb); | 428 | ext4_handle_error(sb); |
423 | } | 429 | } |
424 | 430 | ||
425 | void ext4_error_inode(struct inode *inode, const char *function, | 431 | void __ext4_error_inode(struct inode *inode, const char *function, |
426 | unsigned int line, ext4_fsblk_t block, | 432 | unsigned int line, ext4_fsblk_t block, |
427 | const char *fmt, ...) | 433 | const char *fmt, ...) |
428 | { | 434 | { |
429 | va_list args; | 435 | va_list args; |
430 | struct va_format vaf; | 436 | struct va_format vaf; |
@@ -451,9 +457,9 @@ void ext4_error_inode(struct inode *inode, const char *function, | |||
451 | ext4_handle_error(inode->i_sb); | 457 | ext4_handle_error(inode->i_sb); |
452 | } | 458 | } |
453 | 459 | ||
454 | void ext4_error_file(struct file *file, const char *function, | 460 | void __ext4_error_file(struct file *file, const char *function, |
455 | unsigned int line, ext4_fsblk_t block, | 461 | unsigned int line, ext4_fsblk_t block, |
456 | const char *fmt, ...) | 462 | const char *fmt, ...) |
457 | { | 463 | { |
458 | va_list args; | 464 | va_list args; |
459 | struct va_format vaf; | 465 | struct va_format vaf; |
@@ -570,8 +576,13 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
570 | 576 | ||
571 | if ((sb->s_flags & MS_RDONLY) == 0) { | 577 | if ((sb->s_flags & MS_RDONLY) == 0) { |
572 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 578 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
573 | sb->s_flags |= MS_RDONLY; | ||
574 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | 579 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
580 | /* | ||
581 | * Make sure updated value of ->s_mount_flags will be visible | ||
582 | * before ->s_flags update | ||
583 | */ | ||
584 | smp_wmb(); | ||
585 | sb->s_flags |= MS_RDONLY; | ||
575 | if (EXT4_SB(sb)->s_journal) | 586 | if (EXT4_SB(sb)->s_journal) |
576 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 587 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
577 | save_error_info(sb, function, line); | 588 | save_error_info(sb, function, line); |
@@ -580,7 +591,8 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
580 | panic("EXT4-fs panic from previous error\n"); | 591 | panic("EXT4-fs panic from previous error\n"); |
581 | } | 592 | } |
582 | 593 | ||
583 | void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) | 594 | void __ext4_msg(struct super_block *sb, |
595 | const char *prefix, const char *fmt, ...) | ||
584 | { | 596 | { |
585 | struct va_format vaf; | 597 | struct va_format vaf; |
586 | va_list args; | 598 | va_list args; |
@@ -750,8 +762,10 @@ static void ext4_put_super(struct super_block *sb) | |||
750 | ext4_unregister_li_request(sb); | 762 | ext4_unregister_li_request(sb); |
751 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 763 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
752 | 764 | ||
753 | flush_workqueue(sbi->dio_unwritten_wq); | 765 | flush_workqueue(sbi->unrsv_conversion_wq); |
754 | destroy_workqueue(sbi->dio_unwritten_wq); | 766 | flush_workqueue(sbi->rsv_conversion_wq); |
767 | destroy_workqueue(sbi->unrsv_conversion_wq); | ||
768 | destroy_workqueue(sbi->rsv_conversion_wq); | ||
755 | 769 | ||
756 | if (sbi->s_journal) { | 770 | if (sbi->s_journal) { |
757 | err = jbd2_journal_destroy(sbi->s_journal); | 771 | err = jbd2_journal_destroy(sbi->s_journal); |
@@ -760,7 +774,7 @@ static void ext4_put_super(struct super_block *sb) | |||
760 | ext4_abort(sb, "Couldn't clean up the journal"); | 774 | ext4_abort(sb, "Couldn't clean up the journal"); |
761 | } | 775 | } |
762 | 776 | ||
763 | ext4_es_unregister_shrinker(sb); | 777 | ext4_es_unregister_shrinker(sbi); |
764 | del_timer(&sbi->s_err_report); | 778 | del_timer(&sbi->s_err_report); |
765 | ext4_release_system_zone(sb); | 779 | ext4_release_system_zone(sb); |
766 | ext4_mb_release(sb); | 780 | ext4_mb_release(sb); |
@@ -849,6 +863,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
849 | rwlock_init(&ei->i_es_lock); | 863 | rwlock_init(&ei->i_es_lock); |
850 | INIT_LIST_HEAD(&ei->i_es_lru); | 864 | INIT_LIST_HEAD(&ei->i_es_lru); |
851 | ei->i_es_lru_nr = 0; | 865 | ei->i_es_lru_nr = 0; |
866 | ei->i_touch_when = 0; | ||
852 | ei->i_reserved_data_blocks = 0; | 867 | ei->i_reserved_data_blocks = 0; |
853 | ei->i_reserved_meta_blocks = 0; | 868 | ei->i_reserved_meta_blocks = 0; |
854 | ei->i_allocated_meta_blocks = 0; | 869 | ei->i_allocated_meta_blocks = 0; |
@@ -859,13 +874,15 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
859 | ei->i_reserved_quota = 0; | 874 | ei->i_reserved_quota = 0; |
860 | #endif | 875 | #endif |
861 | ei->jinode = NULL; | 876 | ei->jinode = NULL; |
862 | INIT_LIST_HEAD(&ei->i_completed_io_list); | 877 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); |
878 | INIT_LIST_HEAD(&ei->i_unrsv_conversion_list); | ||
863 | spin_lock_init(&ei->i_completed_io_lock); | 879 | spin_lock_init(&ei->i_completed_io_lock); |
864 | ei->i_sync_tid = 0; | 880 | ei->i_sync_tid = 0; |
865 | ei->i_datasync_tid = 0; | 881 | ei->i_datasync_tid = 0; |
866 | atomic_set(&ei->i_ioend_count, 0); | 882 | atomic_set(&ei->i_ioend_count, 0); |
867 | atomic_set(&ei->i_unwritten, 0); | 883 | atomic_set(&ei->i_unwritten, 0); |
868 | INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work); | 884 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); |
885 | INIT_WORK(&ei->i_unrsv_conversion_work, ext4_end_io_unrsv_work); | ||
869 | 886 | ||
870 | return &ei->vfs_inode; | 887 | return &ei->vfs_inode; |
871 | } | 888 | } |
@@ -1093,6 +1110,7 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1093 | .dirty_inode = ext4_dirty_inode, | 1110 | .dirty_inode = ext4_dirty_inode, |
1094 | .drop_inode = ext4_drop_inode, | 1111 | .drop_inode = ext4_drop_inode, |
1095 | .evict_inode = ext4_evict_inode, | 1112 | .evict_inode = ext4_evict_inode, |
1113 | .sync_fs = ext4_sync_fs_nojournal, | ||
1096 | .put_super = ext4_put_super, | 1114 | .put_super = ext4_put_super, |
1097 | .statfs = ext4_statfs, | 1115 | .statfs = ext4_statfs, |
1098 | .remount_fs = ext4_remount, | 1116 | .remount_fs = ext4_remount, |
@@ -1908,7 +1926,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1908 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1926 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1909 | struct ext4_group_desc *gdp = NULL; | 1927 | struct ext4_group_desc *gdp = NULL; |
1910 | ext4_group_t flex_group; | 1928 | ext4_group_t flex_group; |
1911 | unsigned int groups_per_flex = 0; | ||
1912 | int i, err; | 1929 | int i, err; |
1913 | 1930 | ||
1914 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1931 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
@@ -1916,7 +1933,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1916 | sbi->s_log_groups_per_flex = 0; | 1933 | sbi->s_log_groups_per_flex = 0; |
1917 | return 1; | 1934 | return 1; |
1918 | } | 1935 | } |
1919 | groups_per_flex = 1U << sbi->s_log_groups_per_flex; | ||
1920 | 1936 | ||
1921 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); | 1937 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); |
1922 | if (err) | 1938 | if (err) |
@@ -2164,19 +2180,22 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2164 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 2180 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
2165 | dquot_initialize(inode); | 2181 | dquot_initialize(inode); |
2166 | if (inode->i_nlink) { | 2182 | if (inode->i_nlink) { |
2167 | ext4_msg(sb, KERN_DEBUG, | 2183 | if (test_opt(sb, DEBUG)) |
2168 | "%s: truncating inode %lu to %lld bytes", | 2184 | ext4_msg(sb, KERN_DEBUG, |
2169 | __func__, inode->i_ino, inode->i_size); | 2185 | "%s: truncating inode %lu to %lld bytes", |
2186 | __func__, inode->i_ino, inode->i_size); | ||
2170 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 2187 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
2171 | inode->i_ino, inode->i_size); | 2188 | inode->i_ino, inode->i_size); |
2172 | mutex_lock(&inode->i_mutex); | 2189 | mutex_lock(&inode->i_mutex); |
2190 | truncate_inode_pages(inode->i_mapping, inode->i_size); | ||
2173 | ext4_truncate(inode); | 2191 | ext4_truncate(inode); |
2174 | mutex_unlock(&inode->i_mutex); | 2192 | mutex_unlock(&inode->i_mutex); |
2175 | nr_truncates++; | 2193 | nr_truncates++; |
2176 | } else { | 2194 | } else { |
2177 | ext4_msg(sb, KERN_DEBUG, | 2195 | if (test_opt(sb, DEBUG)) |
2178 | "%s: deleting unreferenced inode %lu", | 2196 | ext4_msg(sb, KERN_DEBUG, |
2179 | __func__, inode->i_ino); | 2197 | "%s: deleting unreferenced inode %lu", |
2198 | __func__, inode->i_ino); | ||
2180 | jbd_debug(2, "deleting unreferenced inode %lu\n", | 2199 | jbd_debug(2, "deleting unreferenced inode %lu\n", |
2181 | inode->i_ino); | 2200 | inode->i_ino); |
2182 | nr_orphans++; | 2201 | nr_orphans++; |
@@ -2377,7 +2396,10 @@ struct ext4_attr { | |||
2377 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); | 2396 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); |
2378 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, | 2397 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, |
2379 | const char *, size_t); | 2398 | const char *, size_t); |
2380 | int offset; | 2399 | union { |
2400 | int offset; | ||
2401 | int deprecated_val; | ||
2402 | } u; | ||
2381 | }; | 2403 | }; |
2382 | 2404 | ||
2383 | static int parse_strtoull(const char *buf, | 2405 | static int parse_strtoull(const char *buf, |
@@ -2446,7 +2468,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2446 | static ssize_t sbi_ui_show(struct ext4_attr *a, | 2468 | static ssize_t sbi_ui_show(struct ext4_attr *a, |
2447 | struct ext4_sb_info *sbi, char *buf) | 2469 | struct ext4_sb_info *sbi, char *buf) |
2448 | { | 2470 | { |
2449 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2471 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2450 | 2472 | ||
2451 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | 2473 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); |
2452 | } | 2474 | } |
@@ -2455,7 +2477,7 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, | |||
2455 | struct ext4_sb_info *sbi, | 2477 | struct ext4_sb_info *sbi, |
2456 | const char *buf, size_t count) | 2478 | const char *buf, size_t count) |
2457 | { | 2479 | { |
2458 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2480 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2459 | unsigned long t; | 2481 | unsigned long t; |
2460 | int ret; | 2482 | int ret; |
2461 | 2483 | ||
@@ -2504,12 +2526,20 @@ static ssize_t trigger_test_error(struct ext4_attr *a, | |||
2504 | return count; | 2526 | return count; |
2505 | } | 2527 | } |
2506 | 2528 | ||
2529 | static ssize_t sbi_deprecated_show(struct ext4_attr *a, | ||
2530 | struct ext4_sb_info *sbi, char *buf) | ||
2531 | { | ||
2532 | return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); | ||
2533 | } | ||
2534 | |||
2507 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ | 2535 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ |
2508 | static struct ext4_attr ext4_attr_##_name = { \ | 2536 | static struct ext4_attr ext4_attr_##_name = { \ |
2509 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 2537 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
2510 | .show = _show, \ | 2538 | .show = _show, \ |
2511 | .store = _store, \ | 2539 | .store = _store, \ |
2512 | .offset = offsetof(struct ext4_sb_info, _elname), \ | 2540 | .u = { \ |
2541 | .offset = offsetof(struct ext4_sb_info, _elname),\ | ||
2542 | }, \ | ||
2513 | } | 2543 | } |
2514 | #define EXT4_ATTR(name, mode, show, store) \ | 2544 | #define EXT4_ATTR(name, mode, show, store) \ |
2515 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2545 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
@@ -2520,6 +2550,14 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | |||
2520 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2550 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
2521 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) | 2551 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) |
2522 | #define ATTR_LIST(name) &ext4_attr_##name.attr | 2552 | #define ATTR_LIST(name) &ext4_attr_##name.attr |
2553 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ | ||
2554 | static struct ext4_attr ext4_attr_##_name = { \ | ||
2555 | .attr = {.name = __stringify(_name), .mode = 0444 }, \ | ||
2556 | .show = sbi_deprecated_show, \ | ||
2557 | .u = { \ | ||
2558 | .deprecated_val = _val, \ | ||
2559 | }, \ | ||
2560 | } | ||
2523 | 2561 | ||
2524 | EXT4_RO_ATTR(delayed_allocation_blocks); | 2562 | EXT4_RO_ATTR(delayed_allocation_blocks); |
2525 | EXT4_RO_ATTR(session_write_kbytes); | 2563 | EXT4_RO_ATTR(session_write_kbytes); |
@@ -2534,7 +2572,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | |||
2534 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | 2572 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); |
2535 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2573 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2536 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2574 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2537 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2575 | EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); |
2538 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | 2576 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); |
2539 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2577 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
2540 | 2578 | ||
@@ -3763,7 +3801,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3763 | sbi->s_err_report.data = (unsigned long) sb; | 3801 | sbi->s_err_report.data = (unsigned long) sb; |
3764 | 3802 | ||
3765 | /* Register extent status tree shrinker */ | 3803 | /* Register extent status tree shrinker */ |
3766 | ext4_es_register_shrinker(sb); | 3804 | ext4_es_register_shrinker(sbi); |
3767 | 3805 | ||
3768 | err = percpu_counter_init(&sbi->s_freeclusters_counter, | 3806 | err = percpu_counter_init(&sbi->s_freeclusters_counter, |
3769 | ext4_count_free_clusters(sb)); | 3807 | ext4_count_free_clusters(sb)); |
@@ -3787,7 +3825,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3787 | } | 3825 | } |
3788 | 3826 | ||
3789 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3827 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3790 | sbi->s_max_writeback_mb_bump = 128; | ||
3791 | sbi->s_extent_max_zeroout_kb = 32; | 3828 | sbi->s_extent_max_zeroout_kb = 32; |
3792 | 3829 | ||
3793 | /* | 3830 | /* |
@@ -3915,12 +3952,20 @@ no_journal: | |||
3915 | * The maximum number of concurrent works can be high and | 3952 | * The maximum number of concurrent works can be high and |
3916 | * concurrency isn't really necessary. Limit it to 1. | 3953 | * concurrency isn't really necessary. Limit it to 1. |
3917 | */ | 3954 | */ |
3918 | EXT4_SB(sb)->dio_unwritten_wq = | 3955 | EXT4_SB(sb)->rsv_conversion_wq = |
3919 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | 3956 | alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
3920 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3957 | if (!EXT4_SB(sb)->rsv_conversion_wq) { |
3921 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3958 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); |
3922 | ret = -ENOMEM; | 3959 | ret = -ENOMEM; |
3923 | goto failed_mount_wq; | 3960 | goto failed_mount4; |
3961 | } | ||
3962 | |||
3963 | EXT4_SB(sb)->unrsv_conversion_wq = | ||
3964 | alloc_workqueue("ext4-unrsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | ||
3965 | if (!EXT4_SB(sb)->unrsv_conversion_wq) { | ||
3966 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); | ||
3967 | ret = -ENOMEM; | ||
3968 | goto failed_mount4; | ||
3924 | } | 3969 | } |
3925 | 3970 | ||
3926 | /* | 3971 | /* |
@@ -4074,14 +4119,17 @@ failed_mount4a: | |||
4074 | sb->s_root = NULL; | 4119 | sb->s_root = NULL; |
4075 | failed_mount4: | 4120 | failed_mount4: |
4076 | ext4_msg(sb, KERN_ERR, "mount failed"); | 4121 | ext4_msg(sb, KERN_ERR, "mount failed"); |
4077 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 4122 | if (EXT4_SB(sb)->rsv_conversion_wq) |
4123 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
4124 | if (EXT4_SB(sb)->unrsv_conversion_wq) | ||
4125 | destroy_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4078 | failed_mount_wq: | 4126 | failed_mount_wq: |
4079 | if (sbi->s_journal) { | 4127 | if (sbi->s_journal) { |
4080 | jbd2_journal_destroy(sbi->s_journal); | 4128 | jbd2_journal_destroy(sbi->s_journal); |
4081 | sbi->s_journal = NULL; | 4129 | sbi->s_journal = NULL; |
4082 | } | 4130 | } |
4083 | failed_mount3: | 4131 | failed_mount3: |
4084 | ext4_es_unregister_shrinker(sb); | 4132 | ext4_es_unregister_shrinker(sbi); |
4085 | del_timer(&sbi->s_err_report); | 4133 | del_timer(&sbi->s_err_report); |
4086 | if (sbi->s_flex_groups) | 4134 | if (sbi->s_flex_groups) |
4087 | ext4_kvfree(sbi->s_flex_groups); | 4135 | ext4_kvfree(sbi->s_flex_groups); |
@@ -4517,19 +4565,52 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4517 | { | 4565 | { |
4518 | int ret = 0; | 4566 | int ret = 0; |
4519 | tid_t target; | 4567 | tid_t target; |
4568 | bool needs_barrier = false; | ||
4520 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4569 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4521 | 4570 | ||
4522 | trace_ext4_sync_fs(sb, wait); | 4571 | trace_ext4_sync_fs(sb, wait); |
4523 | flush_workqueue(sbi->dio_unwritten_wq); | 4572 | flush_workqueue(sbi->rsv_conversion_wq); |
4573 | flush_workqueue(sbi->unrsv_conversion_wq); | ||
4524 | /* | 4574 | /* |
4525 | * Writeback quota in non-journalled quota case - journalled quota has | 4575 | * Writeback quota in non-journalled quota case - journalled quota has |
4526 | * no dirty dquots | 4576 | * no dirty dquots |
4527 | */ | 4577 | */ |
4528 | dquot_writeback_dquots(sb, -1); | 4578 | dquot_writeback_dquots(sb, -1); |
4579 | /* | ||
4580 | * Data writeback is possible w/o journal transaction, so barrier must | ||
4581 | * being sent at the end of the function. But we can skip it if | ||
4582 | * transaction_commit will do it for us. | ||
4583 | */ | ||
4584 | target = jbd2_get_latest_transaction(sbi->s_journal); | ||
4585 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && | ||
4586 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | ||
4587 | needs_barrier = true; | ||
4588 | |||
4529 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | 4589 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { |
4530 | if (wait) | 4590 | if (wait) |
4531 | jbd2_log_wait_commit(sbi->s_journal, target); | 4591 | ret = jbd2_log_wait_commit(sbi->s_journal, target); |
4592 | } | ||
4593 | if (needs_barrier) { | ||
4594 | int err; | ||
4595 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
4596 | if (!ret) | ||
4597 | ret = err; | ||
4532 | } | 4598 | } |
4599 | |||
4600 | return ret; | ||
4601 | } | ||
4602 | |||
4603 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) | ||
4604 | { | ||
4605 | int ret = 0; | ||
4606 | |||
4607 | trace_ext4_sync_fs(sb, wait); | ||
4608 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
4609 | flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4610 | dquot_writeback_dquots(sb, -1); | ||
4611 | if (wait && test_opt(sb, BARRIER)) | ||
4612 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
4613 | |||
4533 | return ret; | 4614 | return ret; |
4534 | } | 4615 | } |
4535 | 4616 | ||