diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 17:36:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-07 17:36:39 -0400 |
commit | 6432f2128414edbea5fd4f6c4fa4c28d0e1c6151 (patch) | |
tree | d3c63c5f2f043ce52d98d8dfd3c9c0a7bc76ed95 /fs/ext4 | |
parent | 1b033447bf847ba49c3816c564c9191c97456b36 (diff) | |
parent | c278531d39f3158bfee93dc67da0b77e09776de2 (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"The big new feature added this time is supporting online resizing
using the meta_bg feature. This allows us to resize file systems
which are greater than 16TB. In addition, the speed of online
resizing has been improved in general.
We also fix a number of races, some of which could lead to deadlocks,
in ext4's Asynchronous I/O and online defrag support, thanks to good
work by Dmitry Monakhov.
There are also a large number of more minor bug fixes and cleanups
from a number of other ext4 contributors, quite of few of which have
submitted fixes for the first time."
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (69 commits)
ext4: fix ext4_flush_completed_IO wait semantics
ext4: fix mtime update in nodelalloc mode
ext4: fix ext_remove_space for punch_hole case
ext4: punch_hole should wait for DIO writers
ext4: serialize truncate with owerwrite DIO workers
ext4: endless truncate due to nonlocked dio readers
ext4: serialize unlocked dio reads with truncate
ext4: serialize dio nonlocked reads with defrag workers
ext4: completed_io locking cleanup
ext4: fix unwritten counter leakage
ext4: give i_aiodio_unwritten a more appropriate name
ext4: ext4_inode_info diet
ext4: convert to use leXX_add_cpu()
ext4: ext4_bread usage audit
fs: reserve fallocate flag codepoint
ext4: remove redundant offset check in mext_check_arguments()
ext4: don't clear orphan list on ro mount with errors
jbd2: fix assertion failure in commit code due to lacking transaction credits
ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails
ext4: enable FITRIM ioctl on bigalloc file system
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 49 | ||||
-rw-r--r-- | fs/ext4/extents.c | 258 | ||||
-rw-r--r-- | fs/ext4/file.c | 6 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 92 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 9 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 18 | ||||
-rw-r--r-- | fs/ext4/inode.c | 83 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 22 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 129 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 5 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 520 | ||||
-rw-r--r-- | fs/ext4/namei.c | 105 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 176 | ||||
-rw-r--r-- | fs/ext4/resize.c | 432 | ||||
-rw-r--r-- | fs/ext4/super.c | 92 |
15 files changed, 1260 insertions, 736 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c3411d4ce2da..3ab2539b7b2e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -186,7 +186,6 @@ struct mpage_da_data { | |||
186 | #define EXT4_IO_END_ERROR 0x0002 | 186 | #define EXT4_IO_END_ERROR 0x0002 |
187 | #define EXT4_IO_END_QUEUED 0x0004 | 187 | #define EXT4_IO_END_QUEUED 0x0004 |
188 | #define EXT4_IO_END_DIRECT 0x0008 | 188 | #define EXT4_IO_END_DIRECT 0x0008 |
189 | #define EXT4_IO_END_IN_FSYNC 0x0010 | ||
190 | 189 | ||
191 | struct ext4_io_page { | 190 | struct ext4_io_page { |
192 | struct page *p_page; | 191 | struct page *p_page; |
@@ -912,9 +911,7 @@ struct ext4_inode_info { | |||
912 | struct list_head i_completed_io_list; | 911 | struct list_head i_completed_io_list; |
913 | spinlock_t i_completed_io_lock; | 912 | spinlock_t i_completed_io_lock; |
914 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 913 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
915 | /* current io_end structure for async DIO write*/ | 914 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
916 | ext4_io_end_t *cur_aio_dio; | ||
917 | atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ | ||
918 | 915 | ||
919 | spinlock_t i_block_reservation_lock; | 916 | spinlock_t i_block_reservation_lock; |
920 | 917 | ||
@@ -1233,6 +1230,7 @@ struct ext4_sb_info { | |||
1233 | spinlock_t s_md_lock; | 1230 | spinlock_t s_md_lock; |
1234 | unsigned short *s_mb_offsets; | 1231 | unsigned short *s_mb_offsets; |
1235 | unsigned int *s_mb_maxs; | 1232 | unsigned int *s_mb_maxs; |
1233 | unsigned int s_group_info_size; | ||
1236 | 1234 | ||
1237 | /* tunables */ | 1235 | /* tunables */ |
1238 | unsigned long s_stripe; | 1236 | unsigned long s_stripe; |
@@ -1243,6 +1241,7 @@ struct ext4_sb_info { | |||
1243 | unsigned int s_mb_order2_reqs; | 1241 | unsigned int s_mb_order2_reqs; |
1244 | unsigned int s_mb_group_prealloc; | 1242 | unsigned int s_mb_group_prealloc; |
1245 | unsigned int s_max_writeback_mb_bump; | 1243 | unsigned int s_max_writeback_mb_bump; |
1244 | unsigned int s_max_dir_size_kb; | ||
1246 | /* where last allocation was done - for stream allocation */ | 1245 | /* where last allocation was done - for stream allocation */ |
1247 | unsigned long s_mb_last_group; | 1246 | unsigned long s_mb_last_group; |
1248 | unsigned long s_mb_last_start; | 1247 | unsigned long s_mb_last_start; |
@@ -1270,8 +1269,12 @@ struct ext4_sb_info { | |||
1270 | unsigned long s_sectors_written_start; | 1269 | unsigned long s_sectors_written_start; |
1271 | u64 s_kbytes_written; | 1270 | u64 s_kbytes_written; |
1272 | 1271 | ||
1272 | /* the size of zero-out chunk */ | ||
1273 | unsigned int s_extent_max_zeroout_kb; | ||
1274 | |||
1273 | unsigned int s_log_groups_per_flex; | 1275 | unsigned int s_log_groups_per_flex; |
1274 | struct flex_groups *s_flex_groups; | 1276 | struct flex_groups *s_flex_groups; |
1277 | ext4_group_t s_flex_groups_allocated; | ||
1275 | 1278 | ||
1276 | /* workqueue for dio unwritten */ | 1279 | /* workqueue for dio unwritten */ |
1277 | struct workqueue_struct *dio_unwritten_wq; | 1280 | struct workqueue_struct *dio_unwritten_wq; |
@@ -1328,10 +1331,20 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
1328 | { | 1331 | { |
1329 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1332 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1330 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1333 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1331 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | 1334 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1332 | } | 1335 | } |
1333 | } | 1336 | } |
1334 | 1337 | ||
1338 | static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) | ||
1339 | { | ||
1340 | return inode->i_private; | ||
1341 | } | ||
1342 | |||
1343 | static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io) | ||
1344 | { | ||
1345 | inode->i_private = io; | ||
1346 | } | ||
1347 | |||
1335 | /* | 1348 | /* |
1336 | * Inode dynamic state flags | 1349 | * Inode dynamic state flags |
1337 | */ | 1350 | */ |
@@ -1345,6 +1358,8 @@ enum { | |||
1345 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1358 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1346 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | 1359 | EXT4_STATE_NEWENTRY, /* File just added to dir */ |
1347 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ | 1360 | EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ |
1361 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read | ||
1362 | nolocking */ | ||
1348 | }; | 1363 | }; |
1349 | 1364 | ||
1350 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ | 1365 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ |
@@ -1932,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p); | |||
1932 | 1947 | ||
1933 | /* fsync.c */ | 1948 | /* fsync.c */ |
1934 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); | 1949 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
1935 | extern int ext4_flush_completed_IO(struct inode *); | 1950 | extern int ext4_flush_unwritten_io(struct inode *); |
1936 | 1951 | ||
1937 | /* hash.c */ | 1952 | /* hash.c */ |
1938 | extern int ext4fs_dirhash(const char *name, int len, struct | 1953 | extern int ext4fs_dirhash(const char *name, int len, struct |
@@ -1966,6 +1981,8 @@ extern void ext4_exit_mballoc(void); | |||
1966 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 1981 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1967 | struct buffer_head *bh, ext4_fsblk_t block, | 1982 | struct buffer_head *bh, ext4_fsblk_t block, |
1968 | unsigned long count, int flags); | 1983 | unsigned long count, int flags); |
1984 | extern int ext4_mb_alloc_groupinfo(struct super_block *sb, | ||
1985 | ext4_group_t ngroups); | ||
1969 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1986 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1970 | ext4_group_t i, struct ext4_group_desc *desc); | 1987 | ext4_group_t i, struct ext4_group_desc *desc); |
1971 | extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | 1988 | extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, |
@@ -2051,6 +2068,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb, | |||
2051 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 2068 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
2052 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); | 2069 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); |
2053 | extern void ext4_kvfree(void *ptr); | 2070 | extern void ext4_kvfree(void *ptr); |
2071 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, | ||
2072 | ext4_group_t ngroup); | ||
2054 | extern __printf(4, 5) | 2073 | extern __printf(4, 5) |
2055 | void __ext4_error(struct super_block *, const char *, unsigned int, | 2074 | void __ext4_error(struct super_block *, const char *, unsigned int, |
2056 | const char *, ...); | 2075 | const char *, ...); |
@@ -2352,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations; | |||
2352 | extern const struct inode_operations ext4_file_inode_operations; | 2371 | extern const struct inode_operations ext4_file_inode_operations; |
2353 | extern const struct file_operations ext4_file_operations; | 2372 | extern const struct file_operations ext4_file_operations; |
2354 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | 2373 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); |
2374 | extern void ext4_unwritten_wait(struct inode *inode); | ||
2355 | 2375 | ||
2356 | /* namei.c */ | 2376 | /* namei.c */ |
2357 | extern const struct inode_operations ext4_dir_inode_operations; | 2377 | extern const struct inode_operations ext4_dir_inode_operations; |
@@ -2400,11 +2420,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2400 | 2420 | ||
2401 | /* page-io.c */ | 2421 | /* page-io.c */ |
2402 | extern int __init ext4_init_pageio(void); | 2422 | extern int __init ext4_init_pageio(void); |
2423 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | ||
2403 | extern void ext4_exit_pageio(void); | 2424 | extern void ext4_exit_pageio(void); |
2404 | extern void ext4_ioend_wait(struct inode *); | 2425 | extern void ext4_ioend_wait(struct inode *); |
2405 | extern void ext4_free_io_end(ext4_io_end_t *io); | 2426 | extern void ext4_free_io_end(ext4_io_end_t *io); |
2406 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2427 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2407 | extern int ext4_end_io_nolock(ext4_io_end_t *io); | ||
2408 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2428 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2409 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2429 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
2410 | struct page *page, | 2430 | struct page *page, |
@@ -2452,6 +2472,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
2452 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); | 2472 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); |
2453 | } | 2473 | } |
2454 | 2474 | ||
2475 | /* | ||
2476 | * Disable DIO read nolock optimization, so new dioreaders will be forced | ||
2477 | * to grab i_mutex | ||
2478 | */ | ||
2479 | static inline void ext4_inode_block_unlocked_dio(struct inode *inode) | ||
2480 | { | ||
2481 | ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | ||
2482 | smp_mb(); | ||
2483 | } | ||
2484 | static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) | ||
2485 | { | ||
2486 | smp_mb(); | ||
2487 | ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | ||
2488 | } | ||
2489 | |||
2455 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 2490 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
2456 | 2491 | ||
2457 | /* For ioend & aio unwritten conversion wait queues */ | 2492 | /* For ioend & aio unwritten conversion wait queues */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aabbb3f53683..1c94cca35ed1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1177 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1177 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1178 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); | 1178 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
1179 | 1179 | ||
1180 | neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); | 1180 | le16_add_cpu(&neh->eh_depth, 1); |
1181 | ext4_mark_inode_dirty(handle, inode); | 1181 | ext4_mark_inode_dirty(handle, inode); |
1182 | out: | 1182 | out: |
1183 | brelse(bh); | 1183 | brelse(bh); |
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
1656 | } | 1656 | } |
1657 | 1657 | ||
1658 | /* | 1658 | /* |
1659 | * This function does a very simple check to see if we can collapse | ||
1660 | * an extent tree with a single extent tree leaf block into the inode. | ||
1661 | */ | ||
1662 | static void ext4_ext_try_to_merge_up(handle_t *handle, | ||
1663 | struct inode *inode, | ||
1664 | struct ext4_ext_path *path) | ||
1665 | { | ||
1666 | size_t s; | ||
1667 | unsigned max_root = ext4_ext_space_root(inode, 0); | ||
1668 | ext4_fsblk_t blk; | ||
1669 | |||
1670 | if ((path[0].p_depth != 1) || | ||
1671 | (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || | ||
1672 | (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) | ||
1673 | return; | ||
1674 | |||
1675 | /* | ||
1676 | * We need to modify the block allocation bitmap and the block | ||
1677 | * group descriptor to release the extent tree block. If we | ||
1678 | * can't get the journal credits, give up. | ||
1679 | */ | ||
1680 | if (ext4_journal_extend(handle, 2)) | ||
1681 | return; | ||
1682 | |||
1683 | /* | ||
1684 | * Copy the extent data up to the inode | ||
1685 | */ | ||
1686 | blk = ext4_idx_pblock(path[0].p_idx); | ||
1687 | s = le16_to_cpu(path[1].p_hdr->eh_entries) * | ||
1688 | sizeof(struct ext4_extent_idx); | ||
1689 | s += sizeof(struct ext4_extent_header); | ||
1690 | |||
1691 | memcpy(path[0].p_hdr, path[1].p_hdr, s); | ||
1692 | path[0].p_depth = 0; | ||
1693 | path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + | ||
1694 | (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); | ||
1695 | path[0].p_hdr->eh_max = cpu_to_le16(max_root); | ||
1696 | |||
1697 | brelse(path[1].p_bh); | ||
1698 | ext4_free_blocks(handle, inode, NULL, blk, 1, | ||
1699 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | ||
1700 | } | ||
1701 | |||
1702 | /* | ||
1659 | * This function tries to merge the @ex extent to neighbours in the tree. | 1703 | * This function tries to merge the @ex extent to neighbours in the tree. |
1660 | * return 1 if merge left else 0. | 1704 | * return 1 if merge left else 0. |
1661 | */ | 1705 | */ |
1662 | static int ext4_ext_try_to_merge(struct inode *inode, | 1706 | static void ext4_ext_try_to_merge(handle_t *handle, |
1707 | struct inode *inode, | ||
1663 | struct ext4_ext_path *path, | 1708 | struct ext4_ext_path *path, |
1664 | struct ext4_extent *ex) { | 1709 | struct ext4_extent *ex) { |
1665 | struct ext4_extent_header *eh; | 1710 | struct ext4_extent_header *eh; |
1666 | unsigned int depth; | 1711 | unsigned int depth; |
1667 | int merge_done = 0; | 1712 | int merge_done = 0; |
1668 | int ret = 0; | ||
1669 | 1713 | ||
1670 | depth = ext_depth(inode); | 1714 | depth = ext_depth(inode); |
1671 | BUG_ON(path[depth].p_hdr == NULL); | 1715 | BUG_ON(path[depth].p_hdr == NULL); |
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode, | |||
1675 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); | 1719 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); |
1676 | 1720 | ||
1677 | if (!merge_done) | 1721 | if (!merge_done) |
1678 | ret = ext4_ext_try_to_merge_right(inode, path, ex); | 1722 | (void) ext4_ext_try_to_merge_right(inode, path, ex); |
1679 | 1723 | ||
1680 | return ret; | 1724 | ext4_ext_try_to_merge_up(handle, inode, path); |
1681 | } | 1725 | } |
1682 | 1726 | ||
1683 | /* | 1727 | /* |
@@ -1893,7 +1937,7 @@ has_space: | |||
1893 | merge: | 1937 | merge: |
1894 | /* try to merge extents */ | 1938 | /* try to merge extents */ |
1895 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) | 1939 | if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) |
1896 | ext4_ext_try_to_merge(inode, path, nearex); | 1940 | ext4_ext_try_to_merge(handle, inode, path, nearex); |
1897 | 1941 | ||
1898 | 1942 | ||
1899 | /* time to correct all indexes above */ | 1943 | /* time to correct all indexes above */ |
@@ -1901,7 +1945,7 @@ merge: | |||
1901 | if (err) | 1945 | if (err) |
1902 | goto cleanup; | 1946 | goto cleanup; |
1903 | 1947 | ||
1904 | err = ext4_ext_dirty(handle, inode, path + depth); | 1948 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
1905 | 1949 | ||
1906 | cleanup: | 1950 | cleanup: |
1907 | if (npath) { | 1951 | if (npath) { |
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2092 | } | 2136 | } |
2093 | 2137 | ||
2094 | /* | 2138 | /* |
2095 | * ext4_ext_check_cache() | 2139 | * ext4_ext_in_cache() |
2096 | * Checks to see if the given block is in the cache. | 2140 | * Checks to see if the given block is in the cache. |
2097 | * If it is, the cached extent is stored in the given | 2141 | * If it is, the cached extent is stored in the given |
2098 | * cache extent pointer. If the cached extent is a hole, | 2142 | * cache extent pointer. |
2099 | * this routine should be used instead of | ||
2100 | * ext4_ext_in_cache if the calling function needs to | ||
2101 | * know the size of the hole. | ||
2102 | * | 2143 | * |
2103 | * @inode: The files inode | 2144 | * @inode: The files inode |
2104 | * @block: The block to look for in the cache | 2145 | * @block: The block to look for in the cache |
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2107 | * | 2148 | * |
2108 | * Return 0 if cache is invalid; 1 if the cache is valid | 2149 | * Return 0 if cache is invalid; 1 if the cache is valid |
2109 | */ | 2150 | */ |
2110 | static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, | 2151 | static int |
2111 | struct ext4_ext_cache *ex){ | 2152 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, |
2153 | struct ext4_extent *ex) | ||
2154 | { | ||
2112 | struct ext4_ext_cache *cex; | 2155 | struct ext4_ext_cache *cex; |
2113 | struct ext4_sb_info *sbi; | 2156 | struct ext4_sb_info *sbi; |
2114 | int ret = 0; | 2157 | int ret = 0; |
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, | |||
2125 | goto errout; | 2168 | goto errout; |
2126 | 2169 | ||
2127 | if (in_range(block, cex->ec_block, cex->ec_len)) { | 2170 | if (in_range(block, cex->ec_block, cex->ec_len)) { |
2128 | memcpy(ex, cex, sizeof(struct ext4_ext_cache)); | 2171 | ex->ee_block = cpu_to_le32(cex->ec_block); |
2172 | ext4_ext_store_pblock(ex, cex->ec_start); | ||
2173 | ex->ee_len = cpu_to_le16(cex->ec_len); | ||
2129 | ext_debug("%u cached by %u:%u:%llu\n", | 2174 | ext_debug("%u cached by %u:%u:%llu\n", |
2130 | block, | 2175 | block, |
2131 | cex->ec_block, cex->ec_len, cex->ec_start); | 2176 | cex->ec_block, cex->ec_len, cex->ec_start); |
@@ -2138,37 +2183,6 @@ errout: | |||
2138 | } | 2183 | } |
2139 | 2184 | ||
2140 | /* | 2185 | /* |
2141 | * ext4_ext_in_cache() | ||
2142 | * Checks to see if the given block is in the cache. | ||
2143 | * If it is, the cached extent is stored in the given | ||
2144 | * extent pointer. | ||
2145 | * | ||
2146 | * @inode: The files inode | ||
2147 | * @block: The block to look for in the cache | ||
2148 | * @ex: Pointer where the cached extent will be stored | ||
2149 | * if it contains block | ||
2150 | * | ||
2151 | * Return 0 if cache is invalid; 1 if the cache is valid | ||
2152 | */ | ||
2153 | static int | ||
2154 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | ||
2155 | struct ext4_extent *ex) | ||
2156 | { | ||
2157 | struct ext4_ext_cache cex; | ||
2158 | int ret = 0; | ||
2159 | |||
2160 | if (ext4_ext_check_cache(inode, block, &cex)) { | ||
2161 | ex->ee_block = cpu_to_le32(cex.ec_block); | ||
2162 | ext4_ext_store_pblock(ex, cex.ec_start); | ||
2163 | ex->ee_len = cpu_to_le16(cex.ec_len); | ||
2164 | ret = 1; | ||
2165 | } | ||
2166 | |||
2167 | return ret; | ||
2168 | } | ||
2169 | |||
2170 | |||
2171 | /* | ||
2172 | * ext4_ext_rm_idx: | 2186 | * ext4_ext_rm_idx: |
2173 | * removes index from the index block. | 2187 | * removes index from the index block. |
2174 | */ | 2188 | */ |
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2274 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2288 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2275 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2289 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2276 | ext4_fsblk_t pblk; | 2290 | ext4_fsblk_t pblk; |
2277 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2291 | int flags = 0; |
2278 | 2292 | ||
2279 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 2293 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
2280 | flags |= EXT4_FREE_BLOCKS_METADATA; | 2294 | flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; |
2295 | else if (ext4_should_journal_data(inode)) | ||
2296 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
2297 | |||
2281 | /* | 2298 | /* |
2282 | * For bigalloc file systems, we never free a partial cluster | 2299 | * For bigalloc file systems, we never free a partial cluster |
2283 | * at the beginning of the extent. Instead, we make a note | 2300 | * at the beginning of the extent. Instead, we make a note |
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2572 | struct ext4_ext_path *path = NULL; | 2589 | struct ext4_ext_path *path = NULL; |
2573 | ext4_fsblk_t partial_cluster = 0; | 2590 | ext4_fsblk_t partial_cluster = 0; |
2574 | handle_t *handle; | 2591 | handle_t *handle; |
2575 | int i = 0, err; | 2592 | int i = 0, err = 0; |
2576 | 2593 | ||
2577 | ext_debug("truncate since %u to %u\n", start, end); | 2594 | ext_debug("truncate since %u to %u\n", start, end); |
2578 | 2595 | ||
@@ -2604,12 +2621,16 @@ again: | |||
2604 | return PTR_ERR(path); | 2621 | return PTR_ERR(path); |
2605 | } | 2622 | } |
2606 | depth = ext_depth(inode); | 2623 | depth = ext_depth(inode); |
2624 | /* Leaf not may not exist only if inode has no blocks at all */ | ||
2607 | ex = path[depth].p_ext; | 2625 | ex = path[depth].p_ext; |
2608 | if (!ex) { | 2626 | if (!ex) { |
2609 | ext4_ext_drop_refs(path); | 2627 | if (depth) { |
2610 | kfree(path); | 2628 | EXT4_ERROR_INODE(inode, |
2611 | path = NULL; | 2629 | "path[%d].p_hdr == NULL", |
2612 | goto cont; | 2630 | depth); |
2631 | err = -EIO; | ||
2632 | } | ||
2633 | goto out; | ||
2613 | } | 2634 | } |
2614 | 2635 | ||
2615 | ee_block = le32_to_cpu(ex->ee_block); | 2636 | ee_block = le32_to_cpu(ex->ee_block); |
@@ -2641,8 +2662,6 @@ again: | |||
2641 | goto out; | 2662 | goto out; |
2642 | } | 2663 | } |
2643 | } | 2664 | } |
2644 | cont: | ||
2645 | |||
2646 | /* | 2665 | /* |
2647 | * We start scanning from right side, freeing all the blocks | 2666 | * We start scanning from right side, freeing all the blocks |
2648 | * after i_size and walking into the tree depth-wise. | 2667 | * after i_size and walking into the tree depth-wise. |
@@ -2924,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2924 | ext4_ext_mark_initialized(ex); | 2943 | ext4_ext_mark_initialized(ex); |
2925 | 2944 | ||
2926 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) | 2945 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) |
2927 | ext4_ext_try_to_merge(inode, path, ex); | 2946 | ext4_ext_try_to_merge(handle, inode, path, ex); |
2928 | 2947 | ||
2929 | err = ext4_ext_dirty(handle, inode, path + depth); | 2948 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
2930 | goto out; | 2949 | goto out; |
2931 | } | 2950 | } |
2932 | 2951 | ||
@@ -2958,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle, | |||
2958 | goto fix_extent_len; | 2977 | goto fix_extent_len; |
2959 | /* update the extent length and mark as initialized */ | 2978 | /* update the extent length and mark as initialized */ |
2960 | ex->ee_len = cpu_to_le16(ee_len); | 2979 | ex->ee_len = cpu_to_le16(ee_len); |
2961 | ext4_ext_try_to_merge(inode, path, ex); | 2980 | ext4_ext_try_to_merge(handle, inode, path, ex); |
2962 | err = ext4_ext_dirty(handle, inode, path + depth); | 2981 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
2963 | goto out; | 2982 | goto out; |
2964 | } else if (err) | 2983 | } else if (err) |
2965 | goto fix_extent_len; | 2984 | goto fix_extent_len; |
@@ -3041,7 +3060,6 @@ out: | |||
3041 | return err ? err : map->m_len; | 3060 | return err ? err : map->m_len; |
3042 | } | 3061 | } |
3043 | 3062 | ||
3044 | #define EXT4_EXT_ZERO_LEN 7 | ||
3045 | /* | 3063 | /* |
3046 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 3064 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
3047 | * to an uninitialized extent. It may result in splitting the uninitialized | 3065 | * to an uninitialized extent. It may result in splitting the uninitialized |
@@ -3067,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3067 | struct ext4_map_blocks *map, | 3085 | struct ext4_map_blocks *map, |
3068 | struct ext4_ext_path *path) | 3086 | struct ext4_ext_path *path) |
3069 | { | 3087 | { |
3088 | struct ext4_sb_info *sbi; | ||
3070 | struct ext4_extent_header *eh; | 3089 | struct ext4_extent_header *eh; |
3071 | struct ext4_map_blocks split_map; | 3090 | struct ext4_map_blocks split_map; |
3072 | struct ext4_extent zero_ex; | 3091 | struct ext4_extent zero_ex; |
3073 | struct ext4_extent *ex; | 3092 | struct ext4_extent *ex; |
3074 | ext4_lblk_t ee_block, eof_block; | 3093 | ext4_lblk_t ee_block, eof_block; |
3075 | unsigned int ee_len, depth; | 3094 | unsigned int ee_len, depth; |
3076 | int allocated; | 3095 | int allocated, max_zeroout = 0; |
3077 | int err = 0; | 3096 | int err = 0; |
3078 | int split_flag = 0; | 3097 | int split_flag = 0; |
3079 | 3098 | ||
@@ -3081,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3081 | "block %llu, max_blocks %u\n", inode->i_ino, | 3100 | "block %llu, max_blocks %u\n", inode->i_ino, |
3082 | (unsigned long long)map->m_lblk, map->m_len); | 3101 | (unsigned long long)map->m_lblk, map->m_len); |
3083 | 3102 | ||
3103 | sbi = EXT4_SB(inode->i_sb); | ||
3084 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3104 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3085 | inode->i_sb->s_blocksize_bits; | 3105 | inode->i_sb->s_blocksize_bits; |
3086 | if (eof_block < map->m_lblk + map->m_len) | 3106 | if (eof_block < map->m_lblk + map->m_len) |
@@ -3180,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3180 | */ | 3200 | */ |
3181 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3201 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
3182 | 3202 | ||
3183 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 3203 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) |
3184 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && | 3204 | max_zeroout = sbi->s_extent_max_zeroout_kb >> |
3185 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 3205 | inode->i_sb->s_blocksize_bits; |
3206 | |||
3207 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ | ||
3208 | if (max_zeroout && (ee_len <= max_zeroout)) { | ||
3186 | err = ext4_ext_zeroout(inode, ex); | 3209 | err = ext4_ext_zeroout(inode, ex); |
3187 | if (err) | 3210 | if (err) |
3188 | goto out; | 3211 | goto out; |
@@ -3191,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3191 | if (err) | 3214 | if (err) |
3192 | goto out; | 3215 | goto out; |
3193 | ext4_ext_mark_initialized(ex); | 3216 | ext4_ext_mark_initialized(ex); |
3194 | ext4_ext_try_to_merge(inode, path, ex); | 3217 | ext4_ext_try_to_merge(handle, inode, path, ex); |
3195 | err = ext4_ext_dirty(handle, inode, path + depth); | 3218 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
3196 | goto out; | 3219 | goto out; |
3197 | } | 3220 | } |
3198 | 3221 | ||
@@ -3206,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3206 | split_map.m_lblk = map->m_lblk; | 3229 | split_map.m_lblk = map->m_lblk; |
3207 | split_map.m_len = map->m_len; | 3230 | split_map.m_len = map->m_len; |
3208 | 3231 | ||
3209 | if (allocated > map->m_len) { | 3232 | if (max_zeroout && (allocated > map->m_len)) { |
3210 | if (allocated <= EXT4_EXT_ZERO_LEN && | 3233 | if (allocated <= max_zeroout) { |
3211 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3212 | /* case 3 */ | 3234 | /* case 3 */ |
3213 | zero_ex.ee_block = | 3235 | zero_ex.ee_block = |
3214 | cpu_to_le32(map->m_lblk); | 3236 | cpu_to_le32(map->m_lblk); |
@@ -3220,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3220 | goto out; | 3242 | goto out; |
3221 | split_map.m_lblk = map->m_lblk; | 3243 | split_map.m_lblk = map->m_lblk; |
3222 | split_map.m_len = allocated; | 3244 | split_map.m_len = allocated; |
3223 | } else if ((map->m_lblk - ee_block + map->m_len < | 3245 | } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { |
3224 | EXT4_EXT_ZERO_LEN) && | ||
3225 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3226 | /* case 2 */ | 3246 | /* case 2 */ |
3227 | if (map->m_lblk != ee_block) { | 3247 | if (map->m_lblk != ee_block) { |
3228 | zero_ex.ee_block = ex->ee_block; | 3248 | zero_ex.ee_block = ex->ee_block; |
@@ -3242,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
3242 | } | 3262 | } |
3243 | 3263 | ||
3244 | allocated = ext4_split_extent(handle, inode, path, | 3264 | allocated = ext4_split_extent(handle, inode, path, |
3245 | &split_map, split_flag, 0); | 3265 | &split_map, split_flag, 0); |
3246 | if (allocated < 0) | 3266 | if (allocated < 0) |
3247 | err = allocated; | 3267 | err = allocated; |
3248 | 3268 | ||
@@ -3256,7 +3276,7 @@ out: | |||
3256 | * to an uninitialized extent. | 3276 | * to an uninitialized extent. |
3257 | * | 3277 | * |
3258 | * Writing to an uninitialized extent may result in splitting the uninitialized | 3278 | * Writing to an uninitialized extent may result in splitting the uninitialized |
3259 | * extent into multiple /initialized uninitialized extents (up to three) | 3279 | * extent into multiple initialized/uninitialized extents (up to three) |
3260 | * There are three possibilities: | 3280 | * There are three possibilities: |
3261 | * a> There is no split required: Entire extent should be uninitialized | 3281 | * a> There is no split required: Entire extent should be uninitialized |
3262 | * b> Splits in two extents: Write is happening at either end of the extent | 3282 | * b> Splits in two extents: Write is happening at either end of the extent |
@@ -3333,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3333 | /* note: ext4_ext_correct_indexes() isn't needed here because | 3353 | /* note: ext4_ext_correct_indexes() isn't needed here because |
3334 | * borders are not changed | 3354 | * borders are not changed |
3335 | */ | 3355 | */ |
3336 | ext4_ext_try_to_merge(inode, path, ex); | 3356 | ext4_ext_try_to_merge(handle, inode, path, ex); |
3337 | 3357 | ||
3338 | /* Mark modified extent as dirty */ | 3358 | /* Mark modified extent as dirty */ |
3339 | err = ext4_ext_dirty(handle, inode, path + depth); | 3359 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
3340 | out: | 3360 | out: |
3341 | ext4_ext_show_leaf(inode, path); | 3361 | ext4_ext_show_leaf(inode, path); |
3342 | return err; | 3362 | return err; |
@@ -3600,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3600 | { | 3620 | { |
3601 | int ret = 0; | 3621 | int ret = 0; |
3602 | int err = 0; | 3622 | int err = 0; |
3603 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3623 | ext4_io_end_t *io = ext4_inode_aio(inode); |
3604 | 3624 | ||
3605 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " | 3625 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " |
3606 | "block %llu, max_blocks %u, flags %x, allocated %u\n", | 3626 | "block %llu, max_blocks %u, flags %x, allocated %u\n", |
@@ -3615,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3615 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3635 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3616 | ret = ext4_split_unwritten_extents(handle, inode, map, | 3636 | ret = ext4_split_unwritten_extents(handle, inode, map, |
3617 | path, flags); | 3637 | path, flags); |
3638 | if (ret <= 0) | ||
3639 | goto out; | ||
3618 | /* | 3640 | /* |
3619 | * Flag the inode(non aio case) or end_io struct (aio case) | 3641 | * Flag the inode(non aio case) or end_io struct (aio case) |
3620 | * that this IO needs to conversion to written when IO is | 3642 | * that this IO needs to conversion to written when IO is |
@@ -3858,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3858 | unsigned int allocated = 0, offset = 0; | 3880 | unsigned int allocated = 0, offset = 0; |
3859 | unsigned int allocated_clusters = 0; | 3881 | unsigned int allocated_clusters = 0; |
3860 | struct ext4_allocation_request ar; | 3882 | struct ext4_allocation_request ar; |
3861 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3883 | ext4_io_end_t *io = ext4_inode_aio(inode); |
3862 | ext4_lblk_t cluster_offset; | 3884 | ext4_lblk_t cluster_offset; |
3885 | int set_unwritten = 0; | ||
3863 | 3886 | ||
3864 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3887 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3865 | map->m_lblk, map->m_len, inode->i_ino); | 3888 | map->m_lblk, map->m_len, inode->i_ino); |
@@ -4082,13 +4105,8 @@ got_allocated_blocks: | |||
4082 | * For non asycn direct IO case, flag the inode state | 4105 | * For non asycn direct IO case, flag the inode state |
4083 | * that we need to perform conversion when IO is done. | 4106 | * that we need to perform conversion when IO is done. |
4084 | */ | 4107 | */ |
4085 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4108 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) |
4086 | if (io) | 4109 | set_unwritten = 1; |
4087 | ext4_set_io_unwritten_flag(inode, io); | ||
4088 | else | ||
4089 | ext4_set_inode_state(inode, | ||
4090 | EXT4_STATE_DIO_UNWRITTEN); | ||
4091 | } | ||
4092 | if (ext4_should_dioread_nolock(inode)) | 4110 | if (ext4_should_dioread_nolock(inode)) |
4093 | map->m_flags |= EXT4_MAP_UNINIT; | 4111 | map->m_flags |= EXT4_MAP_UNINIT; |
4094 | } | 4112 | } |
@@ -4100,6 +4118,15 @@ got_allocated_blocks: | |||
4100 | if (!err) | 4118 | if (!err) |
4101 | err = ext4_ext_insert_extent(handle, inode, path, | 4119 | err = ext4_ext_insert_extent(handle, inode, path, |
4102 | &newex, flags); | 4120 | &newex, flags); |
4121 | |||
4122 | if (!err && set_unwritten) { | ||
4123 | if (io) | ||
4124 | ext4_set_io_unwritten_flag(inode, io); | ||
4125 | else | ||
4126 | ext4_set_inode_state(inode, | ||
4127 | EXT4_STATE_DIO_UNWRITTEN); | ||
4128 | } | ||
4129 | |||
4103 | if (err && free_on_err) { | 4130 | if (err && free_on_err) { |
4104 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? | 4131 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? |
4105 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; | 4132 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; |
@@ -4241,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
4241 | * finish any pending end_io work so we won't run the risk of | 4268 | * finish any pending end_io work so we won't run the risk of |
4242 | * converting any truncated blocks to initialized later | 4269 | * converting any truncated blocks to initialized later |
4243 | */ | 4270 | */ |
4244 | ext4_flush_completed_IO(inode); | 4271 | ext4_flush_unwritten_io(inode); |
4245 | 4272 | ||
4246 | /* | 4273 | /* |
4247 | * probably first extent we're gonna free will be last in block | 4274 | * probably first extent we're gonna free will be last in block |
@@ -4769,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4769 | loff_t first_page_offset, last_page_offset; | 4796 | loff_t first_page_offset, last_page_offset; |
4770 | int credits, err = 0; | 4797 | int credits, err = 0; |
4771 | 4798 | ||
4799 | /* | ||
4800 | * Write out all dirty pages to avoid race conditions | ||
4801 | * Then release them. | ||
4802 | */ | ||
4803 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4804 | err = filemap_write_and_wait_range(mapping, | ||
4805 | offset, offset + length - 1); | ||
4806 | |||
4807 | if (err) | ||
4808 | return err; | ||
4809 | } | ||
4810 | |||
4811 | mutex_lock(&inode->i_mutex); | ||
4812 | /* It's not possible punch hole on append only file */ | ||
4813 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
4814 | err = -EPERM; | ||
4815 | goto out_mutex; | ||
4816 | } | ||
4817 | if (IS_SWAPFILE(inode)) { | ||
4818 | err = -ETXTBSY; | ||
4819 | goto out_mutex; | ||
4820 | } | ||
4821 | |||
4772 | /* No need to punch hole beyond i_size */ | 4822 | /* No need to punch hole beyond i_size */ |
4773 | if (offset >= inode->i_size) | 4823 | if (offset >= inode->i_size) |
4774 | return 0; | 4824 | goto out_mutex; |
4775 | 4825 | ||
4776 | /* | 4826 | /* |
4777 | * If the hole extends beyond i_size, set the hole | 4827 | * If the hole extends beyond i_size, set the hole |
@@ -4789,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4789 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 4839 | first_page_offset = first_page << PAGE_CACHE_SHIFT; |
4790 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 4840 | last_page_offset = last_page << PAGE_CACHE_SHIFT; |
4791 | 4841 | ||
4792 | /* | ||
4793 | * Write out all dirty pages to avoid race conditions | ||
4794 | * Then release them. | ||
4795 | */ | ||
4796 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4797 | err = filemap_write_and_wait_range(mapping, | ||
4798 | offset, offset + length - 1); | ||
4799 | |||
4800 | if (err) | ||
4801 | return err; | ||
4802 | } | ||
4803 | |||
4804 | /* Now release the pages */ | 4842 | /* Now release the pages */ |
4805 | if (last_page_offset > first_page_offset) { | 4843 | if (last_page_offset > first_page_offset) { |
4806 | truncate_pagecache_range(inode, first_page_offset, | 4844 | truncate_pagecache_range(inode, first_page_offset, |
4807 | last_page_offset - 1); | 4845 | last_page_offset - 1); |
4808 | } | 4846 | } |
4809 | 4847 | ||
4810 | /* finish any pending end_io work */ | 4848 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4811 | ext4_flush_completed_IO(inode); | 4849 | ext4_inode_block_unlocked_dio(inode); |
4850 | err = ext4_flush_unwritten_io(inode); | ||
4851 | if (err) | ||
4852 | goto out_dio; | ||
4853 | inode_dio_wait(inode); | ||
4812 | 4854 | ||
4813 | credits = ext4_writepage_trans_blocks(inode); | 4855 | credits = ext4_writepage_trans_blocks(inode); |
4814 | handle = ext4_journal_start(inode, credits); | 4856 | handle = ext4_journal_start(inode, credits); |
4815 | if (IS_ERR(handle)) | 4857 | if (IS_ERR(handle)) { |
4816 | return PTR_ERR(handle); | 4858 | err = PTR_ERR(handle); |
4859 | goto out_dio; | ||
4860 | } | ||
4817 | 4861 | ||
4818 | err = ext4_orphan_add(handle, inode); | ||
4819 | if (err) | ||
4820 | goto out; | ||
4821 | 4862 | ||
4822 | /* | 4863 | /* |
4823 | * Now we need to zero out the non-page-aligned data in the | 4864 | * Now we need to zero out the non-page-aligned data in the |
@@ -4903,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4903 | up_write(&EXT4_I(inode)->i_data_sem); | 4944 | up_write(&EXT4_I(inode)->i_data_sem); |
4904 | 4945 | ||
4905 | out: | 4946 | out: |
4906 | ext4_orphan_del(handle, inode); | ||
4907 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4947 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4908 | ext4_mark_inode_dirty(handle, inode); | 4948 | ext4_mark_inode_dirty(handle, inode); |
4909 | ext4_journal_stop(handle); | 4949 | ext4_journal_stop(handle); |
4950 | out_dio: | ||
4951 | ext4_inode_resume_unlocked_dio(inode); | ||
4952 | out_mutex: | ||
4953 | mutex_unlock(&inode->i_mutex); | ||
4910 | return err; | 4954 | return err; |
4911 | } | 4955 | } |
4912 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4956 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3b0e3bdaabfc..ca6f07afe601 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp) | |||
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
57 | 57 | ||
58 | static void ext4_aiodio_wait(struct inode *inode) | 58 | void ext4_unwritten_wait(struct inode *inode) |
59 | { | 59 | { |
60 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 60 | wait_queue_head_t *wq = ext4_ioend_wq(inode); |
61 | 61 | ||
62 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); | 62 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0)); |
63 | } | 63 | } |
64 | 64 | ||
65 | /* | 65 | /* |
@@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, | |||
116 | "performance will be poor.", | 116 | "performance will be poor.", |
117 | inode->i_ino, current->comm); | 117 | inode->i_ino, current->comm); |
118 | mutex_lock(ext4_aio_mutex(inode)); | 118 | mutex_lock(ext4_aio_mutex(inode)); |
119 | ext4_aiodio_wait(inode); | 119 | ext4_unwritten_wait(inode); |
120 | } | 120 | } |
121 | 121 | ||
122 | BUG_ON(iocb->ki_pos != pos); | 122 | BUG_ON(iocb->ki_pos != pos); |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2a1dcea4f12e..be1d89f385b4 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -34,87 +34,6 @@ | |||
34 | 34 | ||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | ||
38 | { | ||
39 | #ifdef EXT4FS_DEBUG | ||
40 | struct list_head *cur, *before, *after; | ||
41 | ext4_io_end_t *io, *io0, *io1; | ||
42 | unsigned long flags; | ||
43 | |||
44 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
45 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
50 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
51 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
52 | cur = &io->list; | ||
53 | before = cur->prev; | ||
54 | io0 = container_of(before, ext4_io_end_t, list); | ||
55 | after = cur->next; | ||
56 | io1 = container_of(after, ext4_io_end_t, list); | ||
57 | |||
58 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
59 | io, inode->i_ino, io0, io1); | ||
60 | } | ||
61 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This function is called from ext4_sync_file(). | ||
67 | * | ||
68 | * When IO is completed, the work to convert unwritten extents to | ||
69 | * written is queued on workqueue but may not get immediately | ||
70 | * scheduled. When fsync is called, we need to ensure the | ||
71 | * conversion is complete before fsync returns. | ||
72 | * The inode keeps track of a list of pending/completed IO that | ||
73 | * might needs to do the conversion. This function walks through | ||
74 | * the list and convert the related unwritten extents for completed IO | ||
75 | * to written. | ||
76 | * The function return the number of pending IOs on success. | ||
77 | */ | ||
78 | int ext4_flush_completed_IO(struct inode *inode) | ||
79 | { | ||
80 | ext4_io_end_t *io; | ||
81 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
82 | unsigned long flags; | ||
83 | int ret = 0; | ||
84 | int ret2 = 0; | ||
85 | |||
86 | dump_completed_IO(inode); | ||
87 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
88 | while (!list_empty(&ei->i_completed_io_list)){ | ||
89 | io = list_entry(ei->i_completed_io_list.next, | ||
90 | ext4_io_end_t, list); | ||
91 | list_del_init(&io->list); | ||
92 | io->flag |= EXT4_IO_END_IN_FSYNC; | ||
93 | /* | ||
94 | * Calling ext4_end_io_nolock() to convert completed | ||
95 | * IO to written. | ||
96 | * | ||
97 | * When ext4_sync_file() is called, run_queue() may already | ||
98 | * about to flush the work corresponding to this io structure. | ||
99 | * It will be upset if it founds the io structure related | ||
100 | * to the work-to-be schedule is freed. | ||
101 | * | ||
102 | * Thus we need to keep the io structure still valid here after | ||
103 | * conversion finished. The io structure has a flag to | ||
104 | * avoid double converting from both fsync and background work | ||
105 | * queue work. | ||
106 | */ | ||
107 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
108 | ret = ext4_end_io_nolock(io); | ||
109 | if (ret < 0) | ||
110 | ret2 = ret; | ||
111 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
112 | io->flag &= ~EXT4_IO_END_IN_FSYNC; | ||
113 | } | ||
114 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
115 | return (ret2 < 0) ? ret2 : 0; | ||
116 | } | ||
117 | |||
118 | /* | 37 | /* |
119 | * If we're not journaling and this is a just-created file, we have to | 38 | * If we're not journaling and this is a just-created file, we have to |
120 | * sync our parent directory (if it was freshly created) since | 39 | * sync our parent directory (if it was freshly created) since |
@@ -203,7 +122,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
203 | struct inode *inode = file->f_mapping->host; | 122 | struct inode *inode = file->f_mapping->host; |
204 | struct ext4_inode_info *ei = EXT4_I(inode); | 123 | struct ext4_inode_info *ei = EXT4_I(inode); |
205 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 124 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
206 | int ret; | 125 | int ret, err; |
207 | tid_t commit_tid; | 126 | tid_t commit_tid; |
208 | bool needs_barrier = false; | 127 | bool needs_barrier = false; |
209 | 128 | ||
@@ -219,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
219 | if (inode->i_sb->s_flags & MS_RDONLY) | 138 | if (inode->i_sb->s_flags & MS_RDONLY) |
220 | goto out; | 139 | goto out; |
221 | 140 | ||
222 | ret = ext4_flush_completed_IO(inode); | 141 | ret = ext4_flush_unwritten_io(inode); |
223 | if (ret < 0) | 142 | if (ret < 0) |
224 | goto out; | 143 | goto out; |
225 | 144 | ||
@@ -255,8 +174,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
255 | needs_barrier = true; | 174 | needs_barrier = true; |
256 | jbd2_log_start_commit(journal, commit_tid); | 175 | jbd2_log_start_commit(journal, commit_tid); |
257 | ret = jbd2_log_wait_commit(journal, commit_tid); | 176 | ret = jbd2_log_wait_commit(journal, commit_tid); |
258 | if (needs_barrier) | 177 | if (needs_barrier) { |
259 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 178 | err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
179 | if (!ret) | ||
180 | ret = err; | ||
181 | } | ||
260 | out: | 182 | out: |
261 | mutex_unlock(&inode->i_mutex); | 183 | mutex_unlock(&inode->i_mutex); |
262 | trace_ext4_sync_file_exit(inode, ret); | 184 | trace_ext4_sync_file_exit(inode, ret); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 26154b81b836..fa36372f3fdf 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -697,6 +697,15 @@ got_group: | |||
697 | if (!gdp) | 697 | if (!gdp) |
698 | goto fail; | 698 | goto fail; |
699 | 699 | ||
700 | /* | ||
701 | * Check free inodes count before loading bitmap. | ||
702 | */ | ||
703 | if (ext4_free_inodes_count(sb, gdp) == 0) { | ||
704 | if (++group == ngroups) | ||
705 | group = 0; | ||
706 | continue; | ||
707 | } | ||
708 | |||
700 | brelse(inode_bitmap_bh); | 709 | brelse(inode_bitmap_bh); |
701 | inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); | 710 | inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); |
702 | if (!inode_bitmap_bh) | 711 | if (!inode_bitmap_bh) |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 830e1b2bf145..792e388e7b44 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -807,16 +807,30 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
807 | 807 | ||
808 | retry: | 808 | retry: |
809 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | 809 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
810 | if (unlikely(!list_empty(&ei->i_completed_io_list))) { | 810 | if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { |
811 | mutex_lock(&inode->i_mutex); | 811 | mutex_lock(&inode->i_mutex); |
812 | ext4_flush_completed_IO(inode); | 812 | ext4_flush_unwritten_io(inode); |
813 | mutex_unlock(&inode->i_mutex); | 813 | mutex_unlock(&inode->i_mutex); |
814 | } | 814 | } |
815 | /* | ||
816 | * Nolock dioread optimization may be dynamically disabled | ||
817 | * via ext4_inode_block_unlocked_dio(). Check inode's state | ||
818 | * while holding extra i_dio_count ref. | ||
819 | */ | ||
820 | atomic_inc(&inode->i_dio_count); | ||
821 | smp_mb(); | ||
822 | if (unlikely(ext4_test_inode_state(inode, | ||
823 | EXT4_STATE_DIOREAD_LOCK))) { | ||
824 | inode_dio_done(inode); | ||
825 | goto locked; | ||
826 | } | ||
815 | ret = __blockdev_direct_IO(rw, iocb, inode, | 827 | ret = __blockdev_direct_IO(rw, iocb, inode, |
816 | inode->i_sb->s_bdev, iov, | 828 | inode->i_sb->s_bdev, iov, |
817 | offset, nr_segs, | 829 | offset, nr_segs, |
818 | ext4_get_block, NULL, NULL, 0); | 830 | ext4_get_block, NULL, NULL, 0); |
831 | inode_dio_done(inode); | ||
819 | } else { | 832 | } else { |
833 | locked: | ||
820 | ret = blockdev_direct_IO(rw, iocb, inode, iov, | 834 | ret = blockdev_direct_IO(rw, iocb, inode, iov, |
821 | offset, nr_segs, ext4_get_block); | 835 | offset, nr_segs, ext4_get_block); |
822 | 836 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c862ee5fe79d..b3c243b9afa5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -732,11 +732,13 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
732 | err = ext4_map_blocks(handle, inode, &map, | 732 | err = ext4_map_blocks(handle, inode, &map, |
733 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 733 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
734 | 734 | ||
735 | /* ensure we send some value back into *errp */ | ||
736 | *errp = 0; | ||
737 | |||
735 | if (err < 0) | 738 | if (err < 0) |
736 | *errp = err; | 739 | *errp = err; |
737 | if (err <= 0) | 740 | if (err <= 0) |
738 | return NULL; | 741 | return NULL; |
739 | *errp = 0; | ||
740 | 742 | ||
741 | bh = sb_getblk(inode->i_sb, map.m_pblk); | 743 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
742 | if (!bh) { | 744 | if (!bh) { |
@@ -1954,9 +1956,6 @@ out: | |||
1954 | return ret; | 1956 | return ret; |
1955 | } | 1957 | } |
1956 | 1958 | ||
1957 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
1958 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
1959 | |||
1960 | /* | 1959 | /* |
1961 | * Note that we don't need to start a transaction unless we're journaling data | 1960 | * Note that we don't need to start a transaction unless we're journaling data |
1962 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 1961 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
@@ -2463,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2463 | free_blocks = EXT4_C2B(sbi, | 2462 | free_blocks = EXT4_C2B(sbi, |
2464 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); | 2463 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
2465 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | 2464 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); |
2465 | /* | ||
2466 | * Start pushing delalloc when 1/2 of free blocks are dirty. | ||
2467 | */ | ||
2468 | if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && | ||
2469 | !writeback_in_progress(sb->s_bdi) && | ||
2470 | down_read_trylock(&sb->s_umount)) { | ||
2471 | writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); | ||
2472 | up_read(&sb->s_umount); | ||
2473 | } | ||
2474 | |||
2466 | if (2 * free_blocks < 3 * dirty_blocks || | 2475 | if (2 * free_blocks < 3 * dirty_blocks || |
2467 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { | 2476 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { |
2468 | /* | 2477 | /* |
@@ -2471,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2471 | */ | 2480 | */ |
2472 | return 1; | 2481 | return 1; |
2473 | } | 2482 | } |
2474 | /* | ||
2475 | * Even if we don't switch but are nearing capacity, | ||
2476 | * start pushing delalloc when 1/2 of free blocks are dirty. | ||
2477 | */ | ||
2478 | if (free_blocks < 2 * dirty_blocks) | ||
2479 | writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); | ||
2480 | |||
2481 | return 0; | 2483 | return 0; |
2482 | } | 2484 | } |
2483 | 2485 | ||
@@ -2879,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
2879 | { | 2881 | { |
2880 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 2882 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
2881 | ext4_io_end_t *io_end = iocb->private; | 2883 | ext4_io_end_t *io_end = iocb->private; |
2882 | struct workqueue_struct *wq; | ||
2883 | unsigned long flags; | ||
2884 | struct ext4_inode_info *ei; | ||
2885 | 2884 | ||
2886 | /* if not async direct IO or dio with 0 bytes write, just return */ | 2885 | /* if not async direct IO or dio with 0 bytes write, just return */ |
2887 | if (!io_end || !size) | 2886 | if (!io_end || !size) |
@@ -2910,24 +2909,14 @@ out: | |||
2910 | io_end->iocb = iocb; | 2909 | io_end->iocb = iocb; |
2911 | io_end->result = ret; | 2910 | io_end->result = ret; |
2912 | } | 2911 | } |
2913 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
2914 | 2912 | ||
2915 | /* Add the io_end to per-inode completed aio dio list*/ | 2913 | ext4_add_complete_io(io_end); |
2916 | ei = EXT4_I(io_end->inode); | ||
2917 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
2918 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
2919 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
2920 | |||
2921 | /* queue the work to convert unwritten extents to written */ | ||
2922 | queue_work(wq, &io_end->work); | ||
2923 | } | 2914 | } |
2924 | 2915 | ||
2925 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | 2916 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) |
2926 | { | 2917 | { |
2927 | ext4_io_end_t *io_end = bh->b_private; | 2918 | ext4_io_end_t *io_end = bh->b_private; |
2928 | struct workqueue_struct *wq; | ||
2929 | struct inode *inode; | 2919 | struct inode *inode; |
2930 | unsigned long flags; | ||
2931 | 2920 | ||
2932 | if (!test_clear_buffer_uninit(bh) || !io_end) | 2921 | if (!test_clear_buffer_uninit(bh) || !io_end) |
2933 | goto out; | 2922 | goto out; |
@@ -2946,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2946 | */ | 2935 | */ |
2947 | inode = io_end->inode; | 2936 | inode = io_end->inode; |
2948 | ext4_set_io_unwritten_flag(inode, io_end); | 2937 | ext4_set_io_unwritten_flag(inode, io_end); |
2949 | 2938 | ext4_add_complete_io(io_end); | |
2950 | /* Add the io_end to per-inode completed io list*/ | ||
2951 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
2952 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
2953 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
2954 | |||
2955 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
2956 | /* queue the work to convert unwritten extents to written */ | ||
2957 | queue_work(wq, &io_end->work); | ||
2958 | out: | 2939 | out: |
2959 | bh->b_private = NULL; | 2940 | bh->b_private = NULL; |
2960 | bh->b_end_io = NULL; | 2941 | bh->b_end_io = NULL; |
@@ -3029,6 +3010,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3029 | overwrite = *((int *)iocb->private); | 3010 | overwrite = *((int *)iocb->private); |
3030 | 3011 | ||
3031 | if (overwrite) { | 3012 | if (overwrite) { |
3013 | atomic_inc(&inode->i_dio_count); | ||
3032 | down_read(&EXT4_I(inode)->i_data_sem); | 3014 | down_read(&EXT4_I(inode)->i_data_sem); |
3033 | mutex_unlock(&inode->i_mutex); | 3015 | mutex_unlock(&inode->i_mutex); |
3034 | } | 3016 | } |
@@ -3054,7 +3036,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3054 | * hook to the iocb. | 3036 | * hook to the iocb. |
3055 | */ | 3037 | */ |
3056 | iocb->private = NULL; | 3038 | iocb->private = NULL; |
3057 | EXT4_I(inode)->cur_aio_dio = NULL; | 3039 | ext4_inode_aio_set(inode, NULL); |
3058 | if (!is_sync_kiocb(iocb)) { | 3040 | if (!is_sync_kiocb(iocb)) { |
3059 | ext4_io_end_t *io_end = | 3041 | ext4_io_end_t *io_end = |
3060 | ext4_init_io_end(inode, GFP_NOFS); | 3042 | ext4_init_io_end(inode, GFP_NOFS); |
@@ -3071,7 +3053,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3071 | * is a unwritten extents needs to be converted | 3053 | * is a unwritten extents needs to be converted |
3072 | * when IO is completed. | 3054 | * when IO is completed. |
3073 | */ | 3055 | */ |
3074 | EXT4_I(inode)->cur_aio_dio = iocb->private; | 3056 | ext4_inode_aio_set(inode, io_end); |
3075 | } | 3057 | } |
3076 | 3058 | ||
3077 | if (overwrite) | 3059 | if (overwrite) |
@@ -3091,7 +3073,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3091 | NULL, | 3073 | NULL, |
3092 | DIO_LOCKING); | 3074 | DIO_LOCKING); |
3093 | if (iocb->private) | 3075 | if (iocb->private) |
3094 | EXT4_I(inode)->cur_aio_dio = NULL; | 3076 | ext4_inode_aio_set(inode, NULL); |
3095 | /* | 3077 | /* |
3096 | * The io_end structure takes a reference to the inode, | 3078 | * The io_end structure takes a reference to the inode, |
3097 | * that structure needs to be destroyed and the | 3079 | * that structure needs to be destroyed and the |
@@ -3126,6 +3108,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3126 | retake_lock: | 3108 | retake_lock: |
3127 | /* take i_mutex locking again if we do a ovewrite dio */ | 3109 | /* take i_mutex locking again if we do a ovewrite dio */ |
3128 | if (overwrite) { | 3110 | if (overwrite) { |
3111 | inode_dio_done(inode); | ||
3129 | up_read(&EXT4_I(inode)->i_data_sem); | 3112 | up_read(&EXT4_I(inode)->i_data_sem); |
3130 | mutex_lock(&inode->i_mutex); | 3113 | mutex_lock(&inode->i_mutex); |
3131 | } | 3114 | } |
@@ -4052,6 +4035,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4052 | struct ext4_inode_info *ei = EXT4_I(inode); | 4035 | struct ext4_inode_info *ei = EXT4_I(inode); |
4053 | struct buffer_head *bh = iloc->bh; | 4036 | struct buffer_head *bh = iloc->bh; |
4054 | int err = 0, rc, block; | 4037 | int err = 0, rc, block; |
4038 | int need_datasync = 0; | ||
4055 | uid_t i_uid; | 4039 | uid_t i_uid; |
4056 | gid_t i_gid; | 4040 | gid_t i_gid; |
4057 | 4041 | ||
@@ -4102,7 +4086,10 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4102 | raw_inode->i_file_acl_high = | 4086 | raw_inode->i_file_acl_high = |
4103 | cpu_to_le16(ei->i_file_acl >> 32); | 4087 | cpu_to_le16(ei->i_file_acl >> 32); |
4104 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | 4088 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); |
4105 | ext4_isize_set(raw_inode, ei->i_disksize); | 4089 | if (ei->i_disksize != ext4_isize(raw_inode)) { |
4090 | ext4_isize_set(raw_inode, ei->i_disksize); | ||
4091 | need_datasync = 1; | ||
4092 | } | ||
4106 | if (ei->i_disksize > 0x7fffffffULL) { | 4093 | if (ei->i_disksize > 0x7fffffffULL) { |
4107 | struct super_block *sb = inode->i_sb; | 4094 | struct super_block *sb = inode->i_sb; |
4108 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4095 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, |
@@ -4155,7 +4142,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4155 | err = rc; | 4142 | err = rc; |
4156 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); | 4143 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
4157 | 4144 | ||
4158 | ext4_update_inode_fsync_trans(handle, inode, 0); | 4145 | ext4_update_inode_fsync_trans(handle, inode, need_datasync); |
4159 | out_brelse: | 4146 | out_brelse: |
4160 | brelse(bh); | 4147 | brelse(bh); |
4161 | ext4_std_error(inode->i_sb, err); | 4148 | ext4_std_error(inode->i_sb, err); |
@@ -4298,7 +4285,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4298 | } | 4285 | } |
4299 | 4286 | ||
4300 | if (attr->ia_valid & ATTR_SIZE) { | 4287 | if (attr->ia_valid & ATTR_SIZE) { |
4301 | inode_dio_wait(inode); | ||
4302 | 4288 | ||
4303 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 4289 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
4304 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4290 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -4347,8 +4333,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4347 | } | 4333 | } |
4348 | 4334 | ||
4349 | if (attr->ia_valid & ATTR_SIZE) { | 4335 | if (attr->ia_valid & ATTR_SIZE) { |
4350 | if (attr->ia_size != i_size_read(inode)) | 4336 | if (attr->ia_size != i_size_read(inode)) { |
4351 | truncate_setsize(inode, attr->ia_size); | 4337 | truncate_setsize(inode, attr->ia_size); |
4338 | /* Inode size will be reduced, wait for dio in flight. | ||
4339 | * Temporarily disable dioread_nolock to prevent | ||
4340 | * livelock. */ | ||
4341 | if (orphan) { | ||
4342 | ext4_inode_block_unlocked_dio(inode); | ||
4343 | inode_dio_wait(inode); | ||
4344 | ext4_inode_resume_unlocked_dio(inode); | ||
4345 | } | ||
4346 | } | ||
4352 | ext4_truncate(inode); | 4347 | ext4_truncate(inode); |
4353 | } | 4348 | } |
4354 | 4349 | ||
@@ -4727,6 +4722,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4727 | return err; | 4722 | return err; |
4728 | } | 4723 | } |
4729 | 4724 | ||
4725 | /* Wait for all existing dio workers */ | ||
4726 | ext4_inode_block_unlocked_dio(inode); | ||
4727 | inode_dio_wait(inode); | ||
4728 | |||
4730 | jbd2_journal_lock_updates(journal); | 4729 | jbd2_journal_lock_updates(journal); |
4731 | 4730 | ||
4732 | /* | 4731 | /* |
@@ -4746,6 +4745,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4746 | ext4_set_aops(inode); | 4745 | ext4_set_aops(inode); |
4747 | 4746 | ||
4748 | jbd2_journal_unlock_updates(journal); | 4747 | jbd2_journal_unlock_updates(journal); |
4748 | ext4_inode_resume_unlocked_dio(inode); | ||
4749 | 4749 | ||
4750 | /* Finally we can mark the inode as dirty. */ | 4750 | /* Finally we can mark the inode as dirty. */ |
4751 | 4751 | ||
@@ -4780,6 +4780,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
4780 | int retries = 0; | 4780 | int retries = 0; |
4781 | 4781 | ||
4782 | sb_start_pagefault(inode->i_sb); | 4782 | sb_start_pagefault(inode->i_sb); |
4783 | file_update_time(vma->vm_file); | ||
4783 | /* Delalloc case is easy... */ | 4784 | /* Delalloc case is easy... */ |
4784 | if (test_opt(inode->i_sb, DELALLOC) && | 4785 | if (test_opt(inode->i_sb, DELALLOC) && |
4785 | !ext4_should_journal_data(inode) && | 4786 | !ext4_should_journal_data(inode) && |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5439d6a56e99..5747f52f7c72 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -366,26 +366,11 @@ group_add_out: | |||
366 | return -EOPNOTSUPP; | 366 | return -EOPNOTSUPP; |
367 | } | 367 | } |
368 | 368 | ||
369 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
370 | EXT4_FEATURE_INCOMPAT_META_BG)) { | ||
371 | ext4_msg(sb, KERN_ERR, | ||
372 | "Online resizing not (yet) supported with meta_bg"); | ||
373 | return -EOPNOTSUPP; | ||
374 | } | ||
375 | |||
376 | if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, | 369 | if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, |
377 | sizeof(__u64))) { | 370 | sizeof(__u64))) { |
378 | return -EFAULT; | 371 | return -EFAULT; |
379 | } | 372 | } |
380 | 373 | ||
381 | if (n_blocks_count > MAX_32_NUM && | ||
382 | !EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
383 | EXT4_FEATURE_INCOMPAT_64BIT)) { | ||
384 | ext4_msg(sb, KERN_ERR, | ||
385 | "File system only supports 32-bit block numbers"); | ||
386 | return -EOPNOTSUPP; | ||
387 | } | ||
388 | |||
389 | err = ext4_resize_begin(sb); | 374 | err = ext4_resize_begin(sb); |
390 | if (err) | 375 | if (err) |
391 | return err; | 376 | return err; |
@@ -420,13 +405,6 @@ resizefs_out: | |||
420 | if (!blk_queue_discard(q)) | 405 | if (!blk_queue_discard(q)) |
421 | return -EOPNOTSUPP; | 406 | return -EOPNOTSUPP; |
422 | 407 | ||
423 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
424 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
425 | ext4_msg(sb, KERN_ERR, | ||
426 | "FITRIM not supported with bigalloc"); | ||
427 | return -EOPNOTSUPP; | ||
428 | } | ||
429 | |||
430 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | 408 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, |
431 | sizeof(range))) | 409 | sizeof(range))) |
432 | return -EFAULT; | 410 | return -EFAULT; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 08778f6cdfe9..f8b27bf80aca 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "ext4_jbd2.h" | 24 | #include "ext4_jbd2.h" |
25 | #include "mballoc.h" | 25 | #include "mballoc.h" |
26 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
27 | #include <linux/log2.h> | ||
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
28 | #include <trace/events/ext4.h> | 29 | #include <trace/events/ext4.h> |
29 | 30 | ||
@@ -1338,17 +1339,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1338 | mb_check_buddy(e4b); | 1339 | mb_check_buddy(e4b); |
1339 | } | 1340 | } |
1340 | 1341 | ||
1341 | static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | 1342 | static int mb_find_extent(struct ext4_buddy *e4b, int block, |
1342 | int needed, struct ext4_free_extent *ex) | 1343 | int needed, struct ext4_free_extent *ex) |
1343 | { | 1344 | { |
1344 | int next = block; | 1345 | int next = block; |
1345 | int max; | 1346 | int max, order; |
1346 | void *buddy; | 1347 | void *buddy; |
1347 | 1348 | ||
1348 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); | 1349 | assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); |
1349 | BUG_ON(ex == NULL); | 1350 | BUG_ON(ex == NULL); |
1350 | 1351 | ||
1351 | buddy = mb_find_buddy(e4b, order, &max); | 1352 | buddy = mb_find_buddy(e4b, 0, &max); |
1352 | BUG_ON(buddy == NULL); | 1353 | BUG_ON(buddy == NULL); |
1353 | BUG_ON(block >= max); | 1354 | BUG_ON(block >= max); |
1354 | if (mb_test_bit(block, buddy)) { | 1355 | if (mb_test_bit(block, buddy)) { |
@@ -1358,12 +1359,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, | |||
1358 | return 0; | 1359 | return 0; |
1359 | } | 1360 | } |
1360 | 1361 | ||
1361 | /* FIXME dorp order completely ? */ | 1362 | /* find actual order */ |
1362 | if (likely(order == 0)) { | 1363 | order = mb_find_order_for_block(e4b, block); |
1363 | /* find actual order */ | 1364 | block = block >> order; |
1364 | order = mb_find_order_for_block(e4b, block); | ||
1365 | block = block >> order; | ||
1366 | } | ||
1367 | 1365 | ||
1368 | ex->fe_len = 1 << order; | 1366 | ex->fe_len = 1 << order; |
1369 | ex->fe_start = block << order; | 1367 | ex->fe_start = block << order; |
@@ -1549,7 +1547,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, | |||
1549 | /* recheck chunk's availability - we don't know | 1547 | /* recheck chunk's availability - we don't know |
1550 | * when it was found (within this lock-unlock | 1548 | * when it was found (within this lock-unlock |
1551 | * period or not) */ | 1549 | * period or not) */ |
1552 | max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); | 1550 | max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex); |
1553 | if (max >= gex->fe_len) { | 1551 | if (max >= gex->fe_len) { |
1554 | ext4_mb_use_best_found(ac, e4b); | 1552 | ext4_mb_use_best_found(ac, e4b); |
1555 | return; | 1553 | return; |
@@ -1641,7 +1639,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | |||
1641 | return err; | 1639 | return err; |
1642 | 1640 | ||
1643 | ext4_lock_group(ac->ac_sb, group); | 1641 | ext4_lock_group(ac->ac_sb, group); |
1644 | max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); | 1642 | max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); |
1645 | 1643 | ||
1646 | if (max > 0) { | 1644 | if (max > 0) { |
1647 | ac->ac_b_ex = ex; | 1645 | ac->ac_b_ex = ex; |
@@ -1662,17 +1660,20 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1662 | int max; | 1660 | int max; |
1663 | int err; | 1661 | int err; |
1664 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 1662 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
1663 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | ||
1665 | struct ext4_free_extent ex; | 1664 | struct ext4_free_extent ex; |
1666 | 1665 | ||
1667 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) | 1666 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) |
1668 | return 0; | 1667 | return 0; |
1668 | if (grp->bb_free == 0) | ||
1669 | return 0; | ||
1669 | 1670 | ||
1670 | err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); | 1671 | err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); |
1671 | if (err) | 1672 | if (err) |
1672 | return err; | 1673 | return err; |
1673 | 1674 | ||
1674 | ext4_lock_group(ac->ac_sb, group); | 1675 | ext4_lock_group(ac->ac_sb, group); |
1675 | max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, | 1676 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, |
1676 | ac->ac_g_ex.fe_len, &ex); | 1677 | ac->ac_g_ex.fe_len, &ex); |
1677 | 1678 | ||
1678 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { | 1679 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { |
@@ -1788,7 +1789,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1788 | break; | 1789 | break; |
1789 | } | 1790 | } |
1790 | 1791 | ||
1791 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); | 1792 | mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); |
1792 | BUG_ON(ex.fe_len <= 0); | 1793 | BUG_ON(ex.fe_len <= 0); |
1793 | if (free < ex.fe_len) { | 1794 | if (free < ex.fe_len) { |
1794 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, | 1795 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
@@ -1840,7 +1841,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1840 | 1841 | ||
1841 | while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { | 1842 | while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { |
1842 | if (!mb_test_bit(i, bitmap)) { | 1843 | if (!mb_test_bit(i, bitmap)) { |
1843 | max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); | 1844 | max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); |
1844 | if (max >= sbi->s_stripe) { | 1845 | if (max >= sbi->s_stripe) { |
1845 | ac->ac_found++; | 1846 | ac->ac_found++; |
1846 | ac->ac_b_ex = ex; | 1847 | ac->ac_b_ex = ex; |
@@ -1862,6 +1863,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1862 | 1863 | ||
1863 | BUG_ON(cr < 0 || cr >= 4); | 1864 | BUG_ON(cr < 0 || cr >= 4); |
1864 | 1865 | ||
1866 | free = grp->bb_free; | ||
1867 | if (free == 0) | ||
1868 | return 0; | ||
1869 | if (cr <= 2 && free < ac->ac_g_ex.fe_len) | ||
1870 | return 0; | ||
1871 | |||
1865 | /* We only do this if the grp has never been initialized */ | 1872 | /* We only do this if the grp has never been initialized */ |
1866 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | 1873 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
1867 | int ret = ext4_mb_init_group(ac->ac_sb, group); | 1874 | int ret = ext4_mb_init_group(ac->ac_sb, group); |
@@ -1869,10 +1876,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1869 | return 0; | 1876 | return 0; |
1870 | } | 1877 | } |
1871 | 1878 | ||
1872 | free = grp->bb_free; | ||
1873 | fragments = grp->bb_fragments; | 1879 | fragments = grp->bb_fragments; |
1874 | if (free == 0) | ||
1875 | return 0; | ||
1876 | if (fragments == 0) | 1880 | if (fragments == 0) |
1877 | return 0; | 1881 | return 0; |
1878 | 1882 | ||
@@ -2163,6 +2167,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | |||
2163 | return cachep; | 2167 | return cachep; |
2164 | } | 2168 | } |
2165 | 2169 | ||
2170 | /* | ||
2171 | * Allocate the top-level s_group_info array for the specified number | ||
2172 | * of groups | ||
2173 | */ | ||
2174 | int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) | ||
2175 | { | ||
2176 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2177 | unsigned size; | ||
2178 | struct ext4_group_info ***new_groupinfo; | ||
2179 | |||
2180 | size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> | ||
2181 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2182 | if (size <= sbi->s_group_info_size) | ||
2183 | return 0; | ||
2184 | |||
2185 | size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); | ||
2186 | new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); | ||
2187 | if (!new_groupinfo) { | ||
2188 | ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); | ||
2189 | return -ENOMEM; | ||
2190 | } | ||
2191 | if (sbi->s_group_info) { | ||
2192 | memcpy(new_groupinfo, sbi->s_group_info, | ||
2193 | sbi->s_group_info_size * sizeof(*sbi->s_group_info)); | ||
2194 | ext4_kvfree(sbi->s_group_info); | ||
2195 | } | ||
2196 | sbi->s_group_info = new_groupinfo; | ||
2197 | sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); | ||
2198 | ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", | ||
2199 | sbi->s_group_info_size); | ||
2200 | return 0; | ||
2201 | } | ||
2202 | |||
2166 | /* Create and initialize ext4_group_info data for the given group. */ | 2203 | /* Create and initialize ext4_group_info data for the given group. */ |
2167 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2204 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2168 | struct ext4_group_desc *desc) | 2205 | struct ext4_group_desc *desc) |
@@ -2195,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2195 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2232 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2196 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2233 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2197 | 2234 | ||
2198 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); | 2235 | meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL); |
2199 | if (meta_group_info[i] == NULL) { | 2236 | if (meta_group_info[i] == NULL) { |
2200 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); | 2237 | ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); |
2201 | goto exit_group_info; | 2238 | goto exit_group_info; |
2202 | } | 2239 | } |
2203 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2204 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2240 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2205 | &(meta_group_info[i]->bb_state)); | 2241 | &(meta_group_info[i]->bb_state)); |
2206 | 2242 | ||
@@ -2252,49 +2288,14 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2252 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 2288 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
2253 | ext4_group_t i; | 2289 | ext4_group_t i; |
2254 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2290 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2255 | struct ext4_super_block *es = sbi->s_es; | 2291 | int err; |
2256 | int num_meta_group_infos; | ||
2257 | int num_meta_group_infos_max; | ||
2258 | int array_size; | ||
2259 | struct ext4_group_desc *desc; | 2292 | struct ext4_group_desc *desc; |
2260 | struct kmem_cache *cachep; | 2293 | struct kmem_cache *cachep; |
2261 | 2294 | ||
2262 | /* This is the number of blocks used by GDT */ | 2295 | err = ext4_mb_alloc_groupinfo(sb, ngroups); |
2263 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2296 | if (err) |
2264 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | 2297 | return err; |
2265 | |||
2266 | /* | ||
2267 | * This is the total number of blocks used by GDT including | ||
2268 | * the number of reserved blocks for GDT. | ||
2269 | * The s_group_info array is allocated with this value | ||
2270 | * to allow a clean online resize without a complex | ||
2271 | * manipulation of pointer. | ||
2272 | * The drawback is the unused memory when no resize | ||
2273 | * occurs but it's very low in terms of pages | ||
2274 | * (see comments below) | ||
2275 | * Need to handle this properly when META_BG resizing is allowed | ||
2276 | */ | ||
2277 | num_meta_group_infos_max = num_meta_group_infos + | ||
2278 | le16_to_cpu(es->s_reserved_gdt_blocks); | ||
2279 | 2298 | ||
2280 | /* | ||
2281 | * array_size is the size of s_group_info array. We round it | ||
2282 | * to the next power of two because this approximation is done | ||
2283 | * internally by kmalloc so we can have some more memory | ||
2284 | * for free here (e.g. may be used for META_BG resize). | ||
2285 | */ | ||
2286 | array_size = 1; | ||
2287 | while (array_size < sizeof(*sbi->s_group_info) * | ||
2288 | num_meta_group_infos_max) | ||
2289 | array_size = array_size << 1; | ||
2290 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | ||
2291 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | ||
2292 | * So a two level scheme suffices for now. */ | ||
2293 | sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); | ||
2294 | if (sbi->s_group_info == NULL) { | ||
2295 | ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); | ||
2296 | return -ENOMEM; | ||
2297 | } | ||
2298 | sbi->s_buddy_cache = new_inode(sb); | 2299 | sbi->s_buddy_cache = new_inode(sb); |
2299 | if (sbi->s_buddy_cache == NULL) { | 2300 | if (sbi->s_buddy_cache == NULL) { |
2300 | ext4_msg(sb, KERN_ERR, "can't get new inode"); | 2301 | ext4_msg(sb, KERN_ERR, "can't get new inode"); |
@@ -2322,7 +2323,7 @@ err_freebuddy: | |||
2322 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | 2323 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); |
2323 | while (i-- > 0) | 2324 | while (i-- > 0) |
2324 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); | 2325 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2325 | i = num_meta_group_infos; | 2326 | i = sbi->s_group_info_size; |
2326 | while (i-- > 0) | 2327 | while (i-- > 0) |
2327 | kfree(sbi->s_group_info[i]); | 2328 | kfree(sbi->s_group_info[i]); |
2328 | iput(sbi->s_buddy_cache); | 2329 | iput(sbi->s_buddy_cache); |
@@ -4008,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4008 | ext4_get_group_no_and_offset(sb, goal, &group, &block); | 4009 | ext4_get_group_no_and_offset(sb, goal, &group, &block); |
4009 | 4010 | ||
4010 | /* set up allocation goals */ | 4011 | /* set up allocation goals */ |
4011 | memset(ac, 0, sizeof(struct ext4_allocation_context)); | ||
4012 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); | 4012 | ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); |
4013 | ac->ac_status = AC_STATUS_CONTINUE; | 4013 | ac->ac_status = AC_STATUS_CONTINUE; |
4014 | ac->ac_sb = sb; | 4014 | ac->ac_sb = sb; |
@@ -4291,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4291 | } | 4291 | } |
4292 | } | 4292 | } |
4293 | 4293 | ||
4294 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4294 | ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS); |
4295 | if (!ac) { | 4295 | if (!ac) { |
4296 | ar->len = 0; | 4296 | ar->len = 0; |
4297 | *errp = -ENOMEM; | 4297 | *errp = -ENOMEM; |
@@ -4657,6 +4657,8 @@ do_more: | |||
4657 | * with group lock held. generate_buddy look at | 4657 | * with group lock held. generate_buddy look at |
4658 | * them with group lock_held | 4658 | * them with group lock_held |
4659 | */ | 4659 | */ |
4660 | if (test_opt(sb, DISCARD)) | ||
4661 | ext4_issue_discard(sb, block_group, bit, count); | ||
4660 | ext4_lock_group(sb, block_group); | 4662 | ext4_lock_group(sb, block_group); |
4661 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); | 4663 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
4662 | mb_free_blocks(inode, &e4b, bit, count_clusters); | 4664 | mb_free_blocks(inode, &e4b, bit, count_clusters); |
@@ -4988,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4988 | 4990 | ||
4989 | start = range->start >> sb->s_blocksize_bits; | 4991 | start = range->start >> sb->s_blocksize_bits; |
4990 | end = start + (range->len >> sb->s_blocksize_bits) - 1; | 4992 | end = start + (range->len >> sb->s_blocksize_bits) - 1; |
4991 | minlen = range->minlen >> sb->s_blocksize_bits; | 4993 | minlen = EXT4_NUM_B2C(EXT4_SB(sb), |
4994 | range->minlen >> sb->s_blocksize_bits); | ||
4992 | 4995 | ||
4993 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || | 4996 | if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || |
4994 | unlikely(start >= max_blks)) | 4997 | unlikely(start >= max_blks)) |
@@ -5048,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
5048 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); | 5051 | atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); |
5049 | 5052 | ||
5050 | out: | 5053 | out: |
5051 | range->len = trimmed * sb->s_blocksize; | 5054 | range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; |
5052 | return ret; | 5055 | return ret; |
5053 | } | 5056 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c070618c21ce..3ccd889ba953 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -65,11 +65,6 @@ extern u8 mb_enable_debug; | |||
65 | #define MB_DEFAULT_MIN_TO_SCAN 10 | 65 | #define MB_DEFAULT_MIN_TO_SCAN 10 |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * How many groups mballoc will scan looking for the best chunk | ||
69 | */ | ||
70 | #define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 | ||
71 | |||
72 | /* | ||
73 | * with 'ext4_mb_stats' allocator will collect stats that will be | 68 | * with 'ext4_mb_stats' allocator will collect stats that will be |
74 | * shown at umount. The collecting costs though! | 69 | * shown at umount. The collecting costs though! |
75 | */ | 70 | */ |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index c5826c623e7a..292daeeed455 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | /** | 143 | /** |
144 | * mext_check_null_inode - NULL check for two inodes | ||
145 | * | ||
146 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
147 | */ | ||
148 | static int | ||
149 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | ||
150 | const char *function, unsigned int line) | ||
151 | { | ||
152 | int ret = 0; | ||
153 | |||
154 | if (inode1 == NULL) { | ||
155 | __ext4_error(inode2->i_sb, function, line, | ||
156 | "Both inodes should not be NULL: " | ||
157 | "inode1 NULL inode2 %lu", inode2->i_ino); | ||
158 | ret = -EIO; | ||
159 | } else if (inode2 == NULL) { | ||
160 | __ext4_error(inode1->i_sb, function, line, | ||
161 | "Both inodes should not be NULL: " | ||
162 | "inode1 %lu inode2 NULL", inode1->i_ino); | ||
163 | ret = -EIO; | ||
164 | } | ||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | /** | ||
169 | * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem | 144 | * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem |
170 | * | 145 | * |
171 | * @orig_inode: original inode structure | 146 | * Acquire write lock of i_data_sem of the two inodes |
172 | * @donor_inode: donor inode structure | ||
173 | * Acquire write lock of i_data_sem of the two inodes (orig and donor) by | ||
174 | * i_ino order. | ||
175 | */ | 147 | */ |
176 | static void | 148 | static void |
177 | double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) | 149 | double_down_write_data_sem(struct inode *first, struct inode *second) |
178 | { | 150 | { |
179 | struct inode *first = orig_inode, *second = donor_inode; | 151 | if (first < second) { |
152 | down_write(&EXT4_I(first)->i_data_sem); | ||
153 | down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); | ||
154 | } else { | ||
155 | down_write(&EXT4_I(second)->i_data_sem); | ||
156 | down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); | ||
180 | 157 | ||
181 | /* | ||
182 | * Use the inode number to provide the stable locking order instead | ||
183 | * of its address, because the C language doesn't guarantee you can | ||
184 | * compare pointers that don't come from the same array. | ||
185 | */ | ||
186 | if (donor_inode->i_ino < orig_inode->i_ino) { | ||
187 | first = donor_inode; | ||
188 | second = orig_inode; | ||
189 | } | 158 | } |
190 | |||
191 | down_write(&EXT4_I(first)->i_data_sem); | ||
192 | down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); | ||
193 | } | 159 | } |
194 | 160 | ||
195 | /** | 161 | /** |
@@ -604,9 +570,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
604 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | 570 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); |
605 | 571 | ||
606 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); | 572 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); |
607 | tmp_dext->ee_block = | 573 | le32_add_cpu(&tmp_dext->ee_block, diff); |
608 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); | 574 | le16_add_cpu(&tmp_dext->ee_len, -diff); |
609 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); | ||
610 | 575 | ||
611 | if (max_count < ext4_ext_get_actual_len(tmp_dext)) | 576 | if (max_count < ext4_ext_get_actual_len(tmp_dext)) |
612 | tmp_dext->ee_len = cpu_to_le16(max_count); | 577 | tmp_dext->ee_len = cpu_to_le16(max_count); |
@@ -629,6 +594,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
629 | } | 594 | } |
630 | 595 | ||
631 | /** | 596 | /** |
597 | * mext_check_coverage - Check that all extents in range has the same type | ||
598 | * | ||
599 | * @inode: inode in question | ||
600 | * @from: block offset of inode | ||
601 | * @count: block count to be checked | ||
602 | * @uninit: extents expected to be uninitialized | ||
603 | * @err: pointer to save error value | ||
604 | * | ||
605 | * Return 1 if all extents in range has expected type, and zero otherwise. | ||
606 | */ | ||
607 | static int | ||
608 | mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, | ||
609 | int uninit, int *err) | ||
610 | { | ||
611 | struct ext4_ext_path *path = NULL; | ||
612 | struct ext4_extent *ext; | ||
613 | ext4_lblk_t last = from + count; | ||
614 | while (from < last) { | ||
615 | *err = get_ext_path(inode, from, &path); | ||
616 | if (*err) | ||
617 | return 0; | ||
618 | ext = path[ext_depth(inode)].p_ext; | ||
619 | if (!ext) { | ||
620 | ext4_ext_drop_refs(path); | ||
621 | return 0; | ||
622 | } | ||
623 | if (uninit != ext4_ext_is_uninitialized(ext)) { | ||
624 | ext4_ext_drop_refs(path); | ||
625 | return 0; | ||
626 | } | ||
627 | from += ext4_ext_get_actual_len(ext); | ||
628 | ext4_ext_drop_refs(path); | ||
629 | } | ||
630 | return 1; | ||
631 | } | ||
632 | |||
633 | /** | ||
632 | * mext_replace_branches - Replace original extents with new extents | 634 | * mext_replace_branches - Replace original extents with new extents |
633 | * | 635 | * |
634 | * @handle: journal handle | 636 | * @handle: journal handle |
@@ -663,9 +665,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
663 | int replaced_count = 0; | 665 | int replaced_count = 0; |
664 | int dext_alen; | 666 | int dext_alen; |
665 | 667 | ||
666 | /* Protect extent trees against block allocations via delalloc */ | ||
667 | double_down_write_data_sem(orig_inode, donor_inode); | ||
668 | |||
669 | /* Get the original extent for the block "orig_off" */ | 668 | /* Get the original extent for the block "orig_off" */ |
670 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | 669 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
671 | if (*err) | 670 | if (*err) |
@@ -764,12 +763,122 @@ out: | |||
764 | ext4_ext_invalidate_cache(orig_inode); | 763 | ext4_ext_invalidate_cache(orig_inode); |
765 | ext4_ext_invalidate_cache(donor_inode); | 764 | ext4_ext_invalidate_cache(donor_inode); |
766 | 765 | ||
767 | double_up_write_data_sem(orig_inode, donor_inode); | ||
768 | |||
769 | return replaced_count; | 766 | return replaced_count; |
770 | } | 767 | } |
771 | 768 | ||
772 | /** | 769 | /** |
770 | * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 | ||
771 | * | ||
772 | * @inode1: the inode structure | ||
773 | * @inode2: the inode structure | ||
774 | * @index: page index | ||
775 | * @page: result page vector | ||
776 | * | ||
777 | * Grab two locked pages for inode's by inode order | ||
778 | */ | ||
779 | static int | ||
780 | mext_page_double_lock(struct inode *inode1, struct inode *inode2, | ||
781 | pgoff_t index, struct page *page[2]) | ||
782 | { | ||
783 | struct address_space *mapping[2]; | ||
784 | unsigned fl = AOP_FLAG_NOFS; | ||
785 | |||
786 | BUG_ON(!inode1 || !inode2); | ||
787 | if (inode1 < inode2) { | ||
788 | mapping[0] = inode1->i_mapping; | ||
789 | mapping[1] = inode2->i_mapping; | ||
790 | } else { | ||
791 | mapping[0] = inode2->i_mapping; | ||
792 | mapping[1] = inode1->i_mapping; | ||
793 | } | ||
794 | |||
795 | page[0] = grab_cache_page_write_begin(mapping[0], index, fl); | ||
796 | if (!page[0]) | ||
797 | return -ENOMEM; | ||
798 | |||
799 | page[1] = grab_cache_page_write_begin(mapping[1], index, fl); | ||
800 | if (!page[1]) { | ||
801 | unlock_page(page[0]); | ||
802 | page_cache_release(page[0]); | ||
803 | return -ENOMEM; | ||
804 | } | ||
805 | |||
806 | if (inode1 > inode2) { | ||
807 | struct page *tmp; | ||
808 | tmp = page[0]; | ||
809 | page[0] = page[1]; | ||
810 | page[1] = tmp; | ||
811 | } | ||
812 | return 0; | ||
813 | } | ||
814 | |||
815 | /* Force page buffers uptodate w/o dropping page's lock */ | ||
816 | static int | ||
817 | mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) | ||
818 | { | ||
819 | struct inode *inode = page->mapping->host; | ||
820 | sector_t block; | ||
821 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | ||
822 | unsigned int blocksize, block_start, block_end; | ||
823 | int i, err, nr = 0, partial = 0; | ||
824 | BUG_ON(!PageLocked(page)); | ||
825 | BUG_ON(PageWriteback(page)); | ||
826 | |||
827 | if (PageUptodate(page)) | ||
828 | return 0; | ||
829 | |||
830 | blocksize = 1 << inode->i_blkbits; | ||
831 | if (!page_has_buffers(page)) | ||
832 | create_empty_buffers(page, blocksize, 0); | ||
833 | |||
834 | head = page_buffers(page); | ||
835 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
836 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
837 | block++, block_start = block_end, bh = bh->b_this_page) { | ||
838 | block_end = block_start + blocksize; | ||
839 | if (block_end <= from || block_start >= to) { | ||
840 | if (!buffer_uptodate(bh)) | ||
841 | partial = 1; | ||
842 | continue; | ||
843 | } | ||
844 | if (buffer_uptodate(bh)) | ||
845 | continue; | ||
846 | if (!buffer_mapped(bh)) { | ||
847 | int err = 0; | ||
848 | err = ext4_get_block(inode, block, bh, 0); | ||
849 | if (err) { | ||
850 | SetPageError(page); | ||
851 | return err; | ||
852 | } | ||
853 | if (!buffer_mapped(bh)) { | ||
854 | zero_user(page, block_start, blocksize); | ||
855 | if (!err) | ||
856 | set_buffer_uptodate(bh); | ||
857 | continue; | ||
858 | } | ||
859 | } | ||
860 | BUG_ON(nr >= MAX_BUF_PER_PAGE); | ||
861 | arr[nr++] = bh; | ||
862 | } | ||
863 | /* No io required */ | ||
864 | if (!nr) | ||
865 | goto out; | ||
866 | |||
867 | for (i = 0; i < nr; i++) { | ||
868 | bh = arr[i]; | ||
869 | if (!bh_uptodate_or_lock(bh)) { | ||
870 | err = bh_submit_read(bh); | ||
871 | if (err) | ||
872 | return err; | ||
873 | } | ||
874 | } | ||
875 | out: | ||
876 | if (!partial) | ||
877 | SetPageUptodate(page); | ||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | /** | ||
773 | * move_extent_per_page - Move extent data per page | 882 | * move_extent_per_page - Move extent data per page |
774 | * | 883 | * |
775 | * @o_filp: file structure of original file | 884 | * @o_filp: file structure of original file |
@@ -791,26 +900,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
791 | int block_len_in_page, int uninit, int *err) | 900 | int block_len_in_page, int uninit, int *err) |
792 | { | 901 | { |
793 | struct inode *orig_inode = o_filp->f_dentry->d_inode; | 902 | struct inode *orig_inode = o_filp->f_dentry->d_inode; |
794 | struct address_space *mapping = orig_inode->i_mapping; | 903 | struct page *pagep[2] = {NULL, NULL}; |
795 | struct buffer_head *bh; | ||
796 | struct page *page = NULL; | ||
797 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
798 | handle_t *handle; | 904 | handle_t *handle; |
799 | ext4_lblk_t orig_blk_offset; | 905 | ext4_lblk_t orig_blk_offset; |
800 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | 906 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; |
801 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 907 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
802 | unsigned int w_flags = 0; | 908 | unsigned int w_flags = 0; |
803 | unsigned int tmp_data_size, data_size, replaced_size; | 909 | unsigned int tmp_data_size, data_size, replaced_size; |
804 | void *fsdata; | 910 | int err2, jblocks, retries = 0; |
805 | int i, jblocks; | ||
806 | int err2 = 0; | ||
807 | int replaced_count = 0; | 911 | int replaced_count = 0; |
912 | int from = data_offset_in_page << orig_inode->i_blkbits; | ||
808 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 913 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
809 | 914 | ||
810 | /* | 915 | /* |
811 | * It needs twice the amount of ordinary journal buffers because | 916 | * It needs twice the amount of ordinary journal buffers because |
812 | * inode and donor_inode may change each different metadata blocks. | 917 | * inode and donor_inode may change each different metadata blocks. |
813 | */ | 918 | */ |
919 | again: | ||
920 | *err = 0; | ||
814 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; | 921 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; |
815 | handle = ext4_journal_start(orig_inode, jblocks); | 922 | handle = ext4_journal_start(orig_inode, jblocks); |
816 | if (IS_ERR(handle)) { | 923 | if (IS_ERR(handle)) { |
@@ -824,19 +931,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
824 | orig_blk_offset = orig_page_offset * blocks_per_page + | 931 | orig_blk_offset = orig_page_offset * blocks_per_page + |
825 | data_offset_in_page; | 932 | data_offset_in_page; |
826 | 933 | ||
827 | /* | ||
828 | * If orig extent is uninitialized one, | ||
829 | * it's not necessary force the page into memory | ||
830 | * and then force it to be written out again. | ||
831 | * Just swap data blocks between orig and donor. | ||
832 | */ | ||
833 | if (uninit) { | ||
834 | replaced_count = mext_replace_branches(handle, orig_inode, | ||
835 | donor_inode, orig_blk_offset, | ||
836 | block_len_in_page, err); | ||
837 | goto out2; | ||
838 | } | ||
839 | |||
840 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | 934 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; |
841 | 935 | ||
842 | /* Calculate data_size */ | 936 | /* Calculate data_size */ |
@@ -858,75 +952,120 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
858 | 952 | ||
859 | replaced_size = data_size; | 953 | replaced_size = data_size; |
860 | 954 | ||
861 | *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, | 955 | *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, |
862 | &page, &fsdata); | 956 | pagep); |
863 | if (unlikely(*err < 0)) | 957 | if (unlikely(*err < 0)) |
864 | goto out; | 958 | goto stop_journal; |
865 | |||
866 | if (!PageUptodate(page)) { | ||
867 | mapping->a_ops->readpage(o_filp, page); | ||
868 | lock_page(page); | ||
869 | } | ||
870 | |||
871 | /* | 959 | /* |
872 | * try_to_release_page() doesn't call releasepage in writeback mode. | 960 | * If orig extent was uninitialized it can become initialized |
873 | * We should care about the order of writing to the same file | 961 | * at any time after i_data_sem was dropped, in order to |
874 | * by multiple move extent processes. | 962 | * serialize with delalloc we have recheck extent while we |
875 | * It needs to call wait_on_page_writeback() to wait for the | 963 | * hold page's lock, if it is still the case data copy is not |
876 | * writeback of the page. | 964 | * necessary, just swap data blocks between orig and donor. |
877 | */ | 965 | */ |
878 | wait_on_page_writeback(page); | 966 | if (uninit) { |
967 | double_down_write_data_sem(orig_inode, donor_inode); | ||
968 | /* If any of extents in range became initialized we have to | ||
969 | * fallback to data copying */ | ||
970 | uninit = mext_check_coverage(orig_inode, orig_blk_offset, | ||
971 | block_len_in_page, 1, err); | ||
972 | if (*err) | ||
973 | goto drop_data_sem; | ||
879 | 974 | ||
880 | /* Release old bh and drop refs */ | 975 | uninit &= mext_check_coverage(donor_inode, orig_blk_offset, |
881 | try_to_release_page(page, 0); | 976 | block_len_in_page, 1, err); |
977 | if (*err) | ||
978 | goto drop_data_sem; | ||
979 | |||
980 | if (!uninit) { | ||
981 | double_up_write_data_sem(orig_inode, donor_inode); | ||
982 | goto data_copy; | ||
983 | } | ||
984 | if ((page_has_private(pagep[0]) && | ||
985 | !try_to_release_page(pagep[0], 0)) || | ||
986 | (page_has_private(pagep[1]) && | ||
987 | !try_to_release_page(pagep[1], 0))) { | ||
988 | *err = -EBUSY; | ||
989 | goto drop_data_sem; | ||
990 | } | ||
991 | replaced_count = mext_replace_branches(handle, orig_inode, | ||
992 | donor_inode, orig_blk_offset, | ||
993 | block_len_in_page, err); | ||
994 | drop_data_sem: | ||
995 | double_up_write_data_sem(orig_inode, donor_inode); | ||
996 | goto unlock_pages; | ||
997 | } | ||
998 | data_copy: | ||
999 | *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); | ||
1000 | if (*err) | ||
1001 | goto unlock_pages; | ||
1002 | |||
1003 | /* At this point all buffers in range are uptodate, old mapping layout | ||
1004 | * is no longer required, try to drop it now. */ | ||
1005 | if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) || | ||
1006 | (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) { | ||
1007 | *err = -EBUSY; | ||
1008 | goto unlock_pages; | ||
1009 | } | ||
882 | 1010 | ||
883 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, | 1011 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, |
884 | orig_blk_offset, block_len_in_page, | 1012 | orig_blk_offset, |
885 | &err2); | 1013 | block_len_in_page, err); |
886 | if (err2) { | 1014 | if (*err) { |
887 | if (replaced_count) { | 1015 | if (replaced_count) { |
888 | block_len_in_page = replaced_count; | 1016 | block_len_in_page = replaced_count; |
889 | replaced_size = | 1017 | replaced_size = |
890 | block_len_in_page << orig_inode->i_blkbits; | 1018 | block_len_in_page << orig_inode->i_blkbits; |
891 | } else | 1019 | } else |
892 | goto out; | 1020 | goto unlock_pages; |
893 | } | 1021 | } |
1022 | /* Perform all necessary steps similar write_begin()/write_end() | ||
1023 | * but keeping in mind that i_size will not change */ | ||
1024 | *err = __block_write_begin(pagep[0], from, from + replaced_size, | ||
1025 | ext4_get_block); | ||
1026 | if (!*err) | ||
1027 | *err = block_commit_write(pagep[0], from, from + replaced_size); | ||
894 | 1028 | ||
895 | if (!page_has_buffers(page)) | 1029 | if (unlikely(*err < 0)) |
896 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); | 1030 | goto repair_branches; |
897 | 1031 | ||
898 | bh = page_buffers(page); | 1032 | /* Even in case of data=writeback it is reasonable to pin |
899 | for (i = 0; i < data_offset_in_page; i++) | 1033 | * inode to transaction, to prevent unexpected data loss */ |
900 | bh = bh->b_this_page; | 1034 | *err = ext4_jbd2_file_inode(handle, orig_inode); |
901 | 1035 | ||
902 | for (i = 0; i < block_len_in_page; i++) { | 1036 | unlock_pages: |
903 | *err = ext4_get_block(orig_inode, | 1037 | unlock_page(pagep[0]); |
904 | (sector_t)(orig_blk_offset + i), bh, 0); | 1038 | page_cache_release(pagep[0]); |
905 | if (*err < 0) | 1039 | unlock_page(pagep[1]); |
906 | goto out; | 1040 | page_cache_release(pagep[1]); |
907 | 1041 | stop_journal: | |
908 | if (bh->b_this_page != NULL) | ||
909 | bh = bh->b_this_page; | ||
910 | } | ||
911 | |||
912 | *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, | ||
913 | page, fsdata); | ||
914 | page = NULL; | ||
915 | |||
916 | out: | ||
917 | if (unlikely(page)) { | ||
918 | if (PageLocked(page)) | ||
919 | unlock_page(page); | ||
920 | page_cache_release(page); | ||
921 | ext4_journal_stop(handle); | ||
922 | } | ||
923 | out2: | ||
924 | ext4_journal_stop(handle); | 1042 | ext4_journal_stop(handle); |
925 | 1043 | /* Buffer was busy because probably is pinned to journal transaction, | |
926 | if (err2) | 1044 | * force transaction commit may help to free it. */ |
927 | *err = err2; | 1045 | if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, |
928 | 1046 | &retries)) | |
1047 | goto again; | ||
929 | return replaced_count; | 1048 | return replaced_count; |
1049 | |||
1050 | repair_branches: | ||
1051 | /* | ||
1052 | * This should never ever happen! | ||
1053 | * Extents are swapped already, but we are not able to copy data. | ||
1054 | * Try to swap extents to it's original places | ||
1055 | */ | ||
1056 | double_down_write_data_sem(orig_inode, donor_inode); | ||
1057 | replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, | ||
1058 | orig_blk_offset, | ||
1059 | block_len_in_page, &err2); | ||
1060 | double_up_write_data_sem(orig_inode, donor_inode); | ||
1061 | if (replaced_count != block_len_in_page) { | ||
1062 | EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), | ||
1063 | "Unable to copy data block," | ||
1064 | " data will be lost."); | ||
1065 | *err = -EIO; | ||
1066 | } | ||
1067 | replaced_count = 0; | ||
1068 | goto unlock_pages; | ||
930 | } | 1069 | } |
931 | 1070 | ||
932 | /** | 1071 | /** |
@@ -969,14 +1108,6 @@ mext_check_arguments(struct inode *orig_inode, | |||
969 | return -EINVAL; | 1108 | return -EINVAL; |
970 | } | 1109 | } |
971 | 1110 | ||
972 | /* Files should be in the same ext4 FS */ | ||
973 | if (orig_inode->i_sb != donor_inode->i_sb) { | ||
974 | ext4_debug("ext4 move extent: The argument files " | ||
975 | "should be in same FS [ino:orig %lu, donor %lu]\n", | ||
976 | orig_inode->i_ino, donor_inode->i_ino); | ||
977 | return -EINVAL; | ||
978 | } | ||
979 | |||
980 | /* Ext4 move extent supports only extent based file */ | 1111 | /* Ext4 move extent supports only extent based file */ |
981 | if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { | 1112 | if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { |
982 | ext4_debug("ext4 move extent: orig file is not extents " | 1113 | ext4_debug("ext4 move extent: orig file is not extents " |
@@ -1002,7 +1133,6 @@ mext_check_arguments(struct inode *orig_inode, | |||
1002 | } | 1133 | } |
1003 | 1134 | ||
1004 | if ((orig_start >= EXT_MAX_BLOCKS) || | 1135 | if ((orig_start >= EXT_MAX_BLOCKS) || |
1005 | (donor_start >= EXT_MAX_BLOCKS) || | ||
1006 | (*len > EXT_MAX_BLOCKS) || | 1136 | (*len > EXT_MAX_BLOCKS) || |
1007 | (orig_start + *len >= EXT_MAX_BLOCKS)) { | 1137 | (orig_start + *len >= EXT_MAX_BLOCKS)) { |
1008 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " | 1138 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
@@ -1072,35 +1202,19 @@ mext_check_arguments(struct inode *orig_inode, | |||
1072 | * @inode1: the inode structure | 1202 | * @inode1: the inode structure |
1073 | * @inode2: the inode structure | 1203 | * @inode2: the inode structure |
1074 | * | 1204 | * |
1075 | * Lock two inodes' i_mutex by i_ino order. | 1205 | * Lock two inodes' i_mutex |
1076 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
1077 | */ | 1206 | */ |
1078 | static int | 1207 | static void |
1079 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | 1208 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) |
1080 | { | 1209 | { |
1081 | int ret = 0; | 1210 | BUG_ON(inode1 == inode2); |
1082 | 1211 | if (inode1 < inode2) { | |
1083 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
1084 | |||
1085 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); | ||
1086 | if (ret < 0) | ||
1087 | goto out; | ||
1088 | |||
1089 | if (inode1 == inode2) { | ||
1090 | mutex_lock(&inode1->i_mutex); | ||
1091 | goto out; | ||
1092 | } | ||
1093 | |||
1094 | if (inode1->i_ino < inode2->i_ino) { | ||
1095 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | 1212 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); |
1096 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | 1213 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); |
1097 | } else { | 1214 | } else { |
1098 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | 1215 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); |
1099 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | 1216 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); |
1100 | } | 1217 | } |
1101 | |||
1102 | out: | ||
1103 | return ret; | ||
1104 | } | 1218 | } |
1105 | 1219 | ||
1106 | /** | 1220 | /** |
@@ -1109,28 +1223,13 @@ out: | |||
1109 | * @inode1: the inode that is released first | 1223 | * @inode1: the inode that is released first |
1110 | * @inode2: the inode that is released second | 1224 | * @inode2: the inode that is released second |
1111 | * | 1225 | * |
1112 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
1113 | */ | 1226 | */ |
1114 | 1227 | ||
1115 | static int | 1228 | static void |
1116 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | 1229 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) |
1117 | { | 1230 | { |
1118 | int ret = 0; | 1231 | mutex_unlock(&inode1->i_mutex); |
1119 | 1232 | mutex_unlock(&inode2->i_mutex); | |
1120 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
1121 | |||
1122 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); | ||
1123 | if (ret < 0) | ||
1124 | goto out; | ||
1125 | |||
1126 | if (inode1) | ||
1127 | mutex_unlock(&inode1->i_mutex); | ||
1128 | |||
1129 | if (inode2 && inode2 != inode1) | ||
1130 | mutex_unlock(&inode2->i_mutex); | ||
1131 | |||
1132 | out: | ||
1133 | return ret; | ||
1134 | } | 1233 | } |
1135 | 1234 | ||
1136 | /** | 1235 | /** |
@@ -1187,16 +1286,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1187 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | 1286 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; |
1188 | ext4_lblk_t rest_blocks; | 1287 | ext4_lblk_t rest_blocks; |
1189 | pgoff_t orig_page_offset = 0, seq_end_page; | 1288 | pgoff_t orig_page_offset = 0, seq_end_page; |
1190 | int ret1, ret2, depth, last_extent = 0; | 1289 | int ret, depth, last_extent = 0; |
1191 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 1290 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
1192 | int data_offset_in_page; | 1291 | int data_offset_in_page; |
1193 | int block_len_in_page; | 1292 | int block_len_in_page; |
1194 | int uninit; | 1293 | int uninit; |
1195 | 1294 | ||
1196 | /* orig and donor should be different file */ | 1295 | if (orig_inode->i_sb != donor_inode->i_sb) { |
1197 | if (orig_inode->i_ino == donor_inode->i_ino) { | 1296 | ext4_debug("ext4 move extent: The argument files " |
1297 | "should be in same FS [ino:orig %lu, donor %lu]\n", | ||
1298 | orig_inode->i_ino, donor_inode->i_ino); | ||
1299 | return -EINVAL; | ||
1300 | } | ||
1301 | |||
1302 | /* orig and donor should be different inodes */ | ||
1303 | if (orig_inode == donor_inode) { | ||
1198 | ext4_debug("ext4 move extent: The argument files should not " | 1304 | ext4_debug("ext4 move extent: The argument files should not " |
1199 | "be same file [ino:orig %lu, donor %lu]\n", | 1305 | "be same inode [ino:orig %lu, donor %lu]\n", |
1200 | orig_inode->i_ino, donor_inode->i_ino); | 1306 | orig_inode->i_ino, donor_inode->i_ino); |
1201 | return -EINVAL; | 1307 | return -EINVAL; |
1202 | } | 1308 | } |
@@ -1208,18 +1314,27 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1208 | orig_inode->i_ino, donor_inode->i_ino); | 1314 | orig_inode->i_ino, donor_inode->i_ino); |
1209 | return -EINVAL; | 1315 | return -EINVAL; |
1210 | } | 1316 | } |
1211 | 1317 | /* TODO: This is non obvious task to swap blocks for inodes with full | |
1318 | jornaling enabled */ | ||
1319 | if (ext4_should_journal_data(orig_inode) || | ||
1320 | ext4_should_journal_data(donor_inode)) { | ||
1321 | return -EINVAL; | ||
1322 | } | ||
1212 | /* Protect orig and donor inodes against a truncate */ | 1323 | /* Protect orig and donor inodes against a truncate */ |
1213 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); | 1324 | mext_inode_double_lock(orig_inode, donor_inode); |
1214 | if (ret1 < 0) | 1325 | |
1215 | return ret1; | 1326 | /* Wait for all existing dio workers */ |
1327 | ext4_inode_block_unlocked_dio(orig_inode); | ||
1328 | ext4_inode_block_unlocked_dio(donor_inode); | ||
1329 | inode_dio_wait(orig_inode); | ||
1330 | inode_dio_wait(donor_inode); | ||
1216 | 1331 | ||
1217 | /* Protect extent tree against block allocations via delalloc */ | 1332 | /* Protect extent tree against block allocations via delalloc */ |
1218 | double_down_write_data_sem(orig_inode, donor_inode); | 1333 | double_down_write_data_sem(orig_inode, donor_inode); |
1219 | /* Check the filesystem environment whether move_extent can be done */ | 1334 | /* Check the filesystem environment whether move_extent can be done */ |
1220 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1335 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, |
1221 | donor_start, &len); | 1336 | donor_start, &len); |
1222 | if (ret1) | 1337 | if (ret) |
1223 | goto out; | 1338 | goto out; |
1224 | 1339 | ||
1225 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 1340 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; |
@@ -1227,13 +1342,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1227 | if (file_end < block_end) | 1342 | if (file_end < block_end) |
1228 | len -= block_end - file_end; | 1343 | len -= block_end - file_end; |
1229 | 1344 | ||
1230 | ret1 = get_ext_path(orig_inode, block_start, &orig_path); | 1345 | ret = get_ext_path(orig_inode, block_start, &orig_path); |
1231 | if (ret1) | 1346 | if (ret) |
1232 | goto out; | 1347 | goto out; |
1233 | 1348 | ||
1234 | /* Get path structure to check the hole */ | 1349 | /* Get path structure to check the hole */ |
1235 | ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); | 1350 | ret = get_ext_path(orig_inode, block_start, &holecheck_path); |
1236 | if (ret1) | 1351 | if (ret) |
1237 | goto out; | 1352 | goto out; |
1238 | 1353 | ||
1239 | depth = ext_depth(orig_inode); | 1354 | depth = ext_depth(orig_inode); |
@@ -1252,13 +1367,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1252 | last_extent = mext_next_extent(orig_inode, | 1367 | last_extent = mext_next_extent(orig_inode, |
1253 | holecheck_path, &ext_cur); | 1368 | holecheck_path, &ext_cur); |
1254 | if (last_extent < 0) { | 1369 | if (last_extent < 0) { |
1255 | ret1 = last_extent; | 1370 | ret = last_extent; |
1256 | goto out; | 1371 | goto out; |
1257 | } | 1372 | } |
1258 | last_extent = mext_next_extent(orig_inode, orig_path, | 1373 | last_extent = mext_next_extent(orig_inode, orig_path, |
1259 | &ext_dummy); | 1374 | &ext_dummy); |
1260 | if (last_extent < 0) { | 1375 | if (last_extent < 0) { |
1261 | ret1 = last_extent; | 1376 | ret = last_extent; |
1262 | goto out; | 1377 | goto out; |
1263 | } | 1378 | } |
1264 | seq_start = le32_to_cpu(ext_cur->ee_block); | 1379 | seq_start = le32_to_cpu(ext_cur->ee_block); |
@@ -1272,7 +1387,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1272 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 1387 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { |
1273 | ext4_debug("ext4 move extent: The specified range of file " | 1388 | ext4_debug("ext4 move extent: The specified range of file " |
1274 | "may be the hole\n"); | 1389 | "may be the hole\n"); |
1275 | ret1 = -EINVAL; | 1390 | ret = -EINVAL; |
1276 | goto out; | 1391 | goto out; |
1277 | } | 1392 | } |
1278 | 1393 | ||
@@ -1292,7 +1407,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1292 | last_extent = mext_next_extent(orig_inode, holecheck_path, | 1407 | last_extent = mext_next_extent(orig_inode, holecheck_path, |
1293 | &ext_cur); | 1408 | &ext_cur); |
1294 | if (last_extent < 0) { | 1409 | if (last_extent < 0) { |
1295 | ret1 = last_extent; | 1410 | ret = last_extent; |
1296 | break; | 1411 | break; |
1297 | } | 1412 | } |
1298 | add_blocks = ext4_ext_get_actual_len(ext_cur); | 1413 | add_blocks = ext4_ext_get_actual_len(ext_cur); |
@@ -1349,18 +1464,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1349 | orig_page_offset, | 1464 | orig_page_offset, |
1350 | data_offset_in_page, | 1465 | data_offset_in_page, |
1351 | block_len_in_page, uninit, | 1466 | block_len_in_page, uninit, |
1352 | &ret1); | 1467 | &ret); |
1353 | 1468 | ||
1354 | /* Count how many blocks we have exchanged */ | 1469 | /* Count how many blocks we have exchanged */ |
1355 | *moved_len += block_len_in_page; | 1470 | *moved_len += block_len_in_page; |
1356 | if (ret1 < 0) | 1471 | if (ret < 0) |
1357 | break; | 1472 | break; |
1358 | if (*moved_len > len) { | 1473 | if (*moved_len > len) { |
1359 | EXT4_ERROR_INODE(orig_inode, | 1474 | EXT4_ERROR_INODE(orig_inode, |
1360 | "We replaced blocks too much! " | 1475 | "We replaced blocks too much! " |
1361 | "sum of replaced: %llu requested: %llu", | 1476 | "sum of replaced: %llu requested: %llu", |
1362 | *moved_len, len); | 1477 | *moved_len, len); |
1363 | ret1 = -EIO; | 1478 | ret = -EIO; |
1364 | break; | 1479 | break; |
1365 | } | 1480 | } |
1366 | 1481 | ||
@@ -1374,22 +1489,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1374 | } | 1489 | } |
1375 | 1490 | ||
1376 | double_down_write_data_sem(orig_inode, donor_inode); | 1491 | double_down_write_data_sem(orig_inode, donor_inode); |
1377 | if (ret1 < 0) | 1492 | if (ret < 0) |
1378 | break; | 1493 | break; |
1379 | 1494 | ||
1380 | /* Decrease buffer counter */ | 1495 | /* Decrease buffer counter */ |
1381 | if (holecheck_path) | 1496 | if (holecheck_path) |
1382 | ext4_ext_drop_refs(holecheck_path); | 1497 | ext4_ext_drop_refs(holecheck_path); |
1383 | ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); | 1498 | ret = get_ext_path(orig_inode, seq_start, &holecheck_path); |
1384 | if (ret1) | 1499 | if (ret) |
1385 | break; | 1500 | break; |
1386 | depth = holecheck_path->p_depth; | 1501 | depth = holecheck_path->p_depth; |
1387 | 1502 | ||
1388 | /* Decrease buffer counter */ | 1503 | /* Decrease buffer counter */ |
1389 | if (orig_path) | 1504 | if (orig_path) |
1390 | ext4_ext_drop_refs(orig_path); | 1505 | ext4_ext_drop_refs(orig_path); |
1391 | ret1 = get_ext_path(orig_inode, seq_start, &orig_path); | 1506 | ret = get_ext_path(orig_inode, seq_start, &orig_path); |
1392 | if (ret1) | 1507 | if (ret) |
1393 | break; | 1508 | break; |
1394 | 1509 | ||
1395 | ext_cur = holecheck_path[depth].p_ext; | 1510 | ext_cur = holecheck_path[depth].p_ext; |
@@ -1412,12 +1527,9 @@ out: | |||
1412 | kfree(holecheck_path); | 1527 | kfree(holecheck_path); |
1413 | } | 1528 | } |
1414 | double_up_write_data_sem(orig_inode, donor_inode); | 1529 | double_up_write_data_sem(orig_inode, donor_inode); |
1415 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); | 1530 | ext4_inode_resume_unlocked_dio(orig_inode); |
1416 | 1531 | ext4_inode_resume_unlocked_dio(donor_inode); | |
1417 | if (ret1) | 1532 | mext_inode_double_unlock(orig_inode, donor_inode); |
1418 | return ret1; | ||
1419 | else if (ret2) | ||
1420 | return ret2; | ||
1421 | 1533 | ||
1422 | return 0; | 1534 | return ret; |
1423 | } | 1535 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2a42cc04466f..6d600a69fc9d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
55 | { | 55 | { |
56 | struct buffer_head *bh; | 56 | struct buffer_head *bh; |
57 | 57 | ||
58 | if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && | ||
59 | ((inode->i_size >> 10) >= | ||
60 | EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) { | ||
61 | *err = -ENOSPC; | ||
62 | return NULL; | ||
63 | } | ||
64 | |||
58 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; | 65 | *block = inode->i_size >> inode->i_sb->s_blocksize_bits; |
59 | 66 | ||
60 | bh = ext4_bread(handle, inode, *block, 1, err); | 67 | bh = ext4_bread(handle, inode, *block, 1, err); |
@@ -67,6 +74,12 @@ static struct buffer_head *ext4_append(handle_t *handle, | |||
67 | bh = NULL; | 74 | bh = NULL; |
68 | } | 75 | } |
69 | } | 76 | } |
77 | if (!bh && !(*err)) { | ||
78 | *err = -EIO; | ||
79 | ext4_error(inode->i_sb, | ||
80 | "Directory hole detected on inode %lu\n", | ||
81 | inode->i_ino); | ||
82 | } | ||
70 | return bh; | 83 | return bh; |
71 | } | 84 | } |
72 | 85 | ||
@@ -594,8 +607,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
594 | u32 hash; | 607 | u32 hash; |
595 | 608 | ||
596 | frame->bh = NULL; | 609 | frame->bh = NULL; |
597 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) | 610 | if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) { |
611 | if (*err == 0) | ||
612 | *err = ERR_BAD_DX_DIR; | ||
598 | goto fail; | 613 | goto fail; |
614 | } | ||
599 | root = (struct dx_root *) bh->b_data; | 615 | root = (struct dx_root *) bh->b_data; |
600 | if (root->info.hash_version != DX_HASH_TEA && | 616 | if (root->info.hash_version != DX_HASH_TEA && |
601 | root->info.hash_version != DX_HASH_HALF_MD4 && | 617 | root->info.hash_version != DX_HASH_HALF_MD4 && |
@@ -696,8 +712,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
696 | frame->entries = entries; | 712 | frame->entries = entries; |
697 | frame->at = at; | 713 | frame->at = at; |
698 | if (!indirect--) return frame; | 714 | if (!indirect--) return frame; |
699 | if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) | 715 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) { |
716 | if (!(*err)) | ||
717 | *err = ERR_BAD_DX_DIR; | ||
700 | goto fail2; | 718 | goto fail2; |
719 | } | ||
701 | at = entries = ((struct dx_node *) bh->b_data)->entries; | 720 | at = entries = ((struct dx_node *) bh->b_data)->entries; |
702 | 721 | ||
703 | if (!buffer_verified(bh) && | 722 | if (!buffer_verified(bh) && |
@@ -807,8 +826,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
807 | */ | 826 | */ |
808 | while (num_frames--) { | 827 | while (num_frames--) { |
809 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), | 828 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), |
810 | 0, &err))) | 829 | 0, &err))) { |
830 | if (!err) { | ||
831 | ext4_error(dir->i_sb, | ||
832 | "Directory hole detected on inode %lu\n", | ||
833 | dir->i_ino); | ||
834 | return -EIO; | ||
835 | } | ||
811 | return err; /* Failure */ | 836 | return err; /* Failure */ |
837 | } | ||
812 | 838 | ||
813 | if (!buffer_verified(bh) && | 839 | if (!buffer_verified(bh) && |
814 | !ext4_dx_csum_verify(dir, | 840 | !ext4_dx_csum_verify(dir, |
@@ -839,12 +865,19 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
839 | { | 865 | { |
840 | struct buffer_head *bh; | 866 | struct buffer_head *bh; |
841 | struct ext4_dir_entry_2 *de, *top; | 867 | struct ext4_dir_entry_2 *de, *top; |
842 | int err, count = 0; | 868 | int err = 0, count = 0; |
843 | 869 | ||
844 | dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", | 870 | dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", |
845 | (unsigned long)block)); | 871 | (unsigned long)block)); |
846 | if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) | 872 | if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) { |
873 | if (!err) { | ||
874 | err = -EIO; | ||
875 | ext4_error(dir->i_sb, | ||
876 | "Directory hole detected on inode %lu\n", | ||
877 | dir->i_ino); | ||
878 | } | ||
847 | return err; | 879 | return err; |
880 | } | ||
848 | 881 | ||
849 | if (!buffer_verified(bh) && | 882 | if (!buffer_verified(bh) && |
850 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) | 883 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) |
@@ -1267,8 +1300,15 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1267 | return NULL; | 1300 | return NULL; |
1268 | do { | 1301 | do { |
1269 | block = dx_get_block(frame->at); | 1302 | block = dx_get_block(frame->at); |
1270 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) | 1303 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) { |
1304 | if (!(*err)) { | ||
1305 | *err = -EIO; | ||
1306 | ext4_error(dir->i_sb, | ||
1307 | "Directory hole detected on inode %lu\n", | ||
1308 | dir->i_ino); | ||
1309 | } | ||
1271 | goto errout; | 1310 | goto errout; |
1311 | } | ||
1272 | 1312 | ||
1273 | if (!buffer_verified(bh) && | 1313 | if (!buffer_verified(bh) && |
1274 | !ext4_dirent_csum_verify(dir, | 1314 | !ext4_dirent_csum_verify(dir, |
@@ -1801,9 +1841,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1801 | } | 1841 | } |
1802 | blocks = dir->i_size >> sb->s_blocksize_bits; | 1842 | blocks = dir->i_size >> sb->s_blocksize_bits; |
1803 | for (block = 0; block < blocks; block++) { | 1843 | for (block = 0; block < blocks; block++) { |
1804 | bh = ext4_bread(handle, dir, block, 0, &retval); | 1844 | if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) { |
1805 | if(!bh) | 1845 | if (!retval) { |
1846 | retval = -EIO; | ||
1847 | ext4_error(inode->i_sb, | ||
1848 | "Directory hole detected on inode %lu\n", | ||
1849 | inode->i_ino); | ||
1850 | } | ||
1806 | return retval; | 1851 | return retval; |
1852 | } | ||
1807 | if (!buffer_verified(bh) && | 1853 | if (!buffer_verified(bh) && |
1808 | !ext4_dirent_csum_verify(dir, | 1854 | !ext4_dirent_csum_verify(dir, |
1809 | (struct ext4_dir_entry *)bh->b_data)) | 1855 | (struct ext4_dir_entry *)bh->b_data)) |
@@ -1860,8 +1906,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1860 | entries = frame->entries; | 1906 | entries = frame->entries; |
1861 | at = frame->at; | 1907 | at = frame->at; |
1862 | 1908 | ||
1863 | if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) | 1909 | if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) { |
1910 | if (!err) { | ||
1911 | err = -EIO; | ||
1912 | ext4_error(dir->i_sb, | ||
1913 | "Directory hole detected on inode %lu\n", | ||
1914 | dir->i_ino); | ||
1915 | } | ||
1864 | goto cleanup; | 1916 | goto cleanup; |
1917 | } | ||
1865 | 1918 | ||
1866 | if (!buffer_verified(bh) && | 1919 | if (!buffer_verified(bh) && |
1867 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) | 1920 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) |
@@ -2149,9 +2202,7 @@ retry: | |||
2149 | err = PTR_ERR(inode); | 2202 | err = PTR_ERR(inode); |
2150 | if (!IS_ERR(inode)) { | 2203 | if (!IS_ERR(inode)) { |
2151 | init_special_inode(inode, inode->i_mode, rdev); | 2204 | init_special_inode(inode, inode->i_mode, rdev); |
2152 | #ifdef CONFIG_EXT4_FS_XATTR | ||
2153 | inode->i_op = &ext4_special_inode_operations; | 2205 | inode->i_op = &ext4_special_inode_operations; |
2154 | #endif | ||
2155 | err = ext4_add_nondir(handle, dentry, inode); | 2206 | err = ext4_add_nondir(handle, dentry, inode); |
2156 | } | 2207 | } |
2157 | ext4_journal_stop(handle); | 2208 | ext4_journal_stop(handle); |
@@ -2199,9 +2250,15 @@ retry: | |||
2199 | inode->i_op = &ext4_dir_inode_operations; | 2250 | inode->i_op = &ext4_dir_inode_operations; |
2200 | inode->i_fop = &ext4_dir_operations; | 2251 | inode->i_fop = &ext4_dir_operations; |
2201 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; | 2252 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; |
2202 | dir_block = ext4_bread(handle, inode, 0, 1, &err); | 2253 | if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { |
2203 | if (!dir_block) | 2254 | if (!err) { |
2255 | err = -EIO; | ||
2256 | ext4_error(inode->i_sb, | ||
2257 | "Directory hole detected on inode %lu\n", | ||
2258 | inode->i_ino); | ||
2259 | } | ||
2204 | goto out_clear_inode; | 2260 | goto out_clear_inode; |
2261 | } | ||
2205 | BUFFER_TRACE(dir_block, "get_write_access"); | 2262 | BUFFER_TRACE(dir_block, "get_write_access"); |
2206 | err = ext4_journal_get_write_access(handle, dir_block); | 2263 | err = ext4_journal_get_write_access(handle, dir_block); |
2207 | if (err) | 2264 | if (err) |
@@ -2318,6 +2375,11 @@ static int empty_dir(struct inode *inode) | |||
2318 | EXT4_ERROR_INODE(inode, | 2375 | EXT4_ERROR_INODE(inode, |
2319 | "error %d reading directory " | 2376 | "error %d reading directory " |
2320 | "lblock %u", err, lblock); | 2377 | "lblock %u", err, lblock); |
2378 | else | ||
2379 | ext4_warning(inode->i_sb, | ||
2380 | "bad directory (dir #%lu) - no data block", | ||
2381 | inode->i_ino); | ||
2382 | |||
2321 | offset += sb->s_blocksize; | 2383 | offset += sb->s_blocksize; |
2322 | continue; | 2384 | continue; |
2323 | } | 2385 | } |
@@ -2362,7 +2424,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2362 | struct ext4_iloc iloc; | 2424 | struct ext4_iloc iloc; |
2363 | int err = 0, rc; | 2425 | int err = 0, rc; |
2364 | 2426 | ||
2365 | if (!ext4_handle_valid(handle)) | 2427 | if (!EXT4_SB(sb)->s_journal) |
2366 | return 0; | 2428 | return 0; |
2367 | 2429 | ||
2368 | mutex_lock(&EXT4_SB(sb)->s_orphan_lock); | 2430 | mutex_lock(&EXT4_SB(sb)->s_orphan_lock); |
@@ -2436,8 +2498,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2436 | struct ext4_iloc iloc; | 2498 | struct ext4_iloc iloc; |
2437 | int err = 0; | 2499 | int err = 0; |
2438 | 2500 | ||
2439 | /* ext4_handle_valid() assumes a valid handle_t pointer */ | 2501 | if (!EXT4_SB(inode->i_sb)->s_journal) |
2440 | if (handle && !ext4_handle_valid(handle)) | ||
2441 | return 0; | 2502 | return 0; |
2442 | 2503 | ||
2443 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); | 2504 | mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); |
@@ -2456,7 +2517,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2456 | * transaction handle with which to update the orphan list on | 2517 | * transaction handle with which to update the orphan list on |
2457 | * disk, but we still need to remove the inode from the linked | 2518 | * disk, but we still need to remove the inode from the linked |
2458 | * list in memory. */ | 2519 | * list in memory. */ |
2459 | if (sbi->s_journal && !handle) | 2520 | if (!handle) |
2460 | goto out; | 2521 | goto out; |
2461 | 2522 | ||
2462 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 2523 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
@@ -2826,9 +2887,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2826 | goto end_rename; | 2887 | goto end_rename; |
2827 | } | 2888 | } |
2828 | retval = -EIO; | 2889 | retval = -EIO; |
2829 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); | 2890 | if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) { |
2830 | if (!dir_bh) | 2891 | if (!retval) { |
2892 | retval = -EIO; | ||
2893 | ext4_error(old_inode->i_sb, | ||
2894 | "Directory hole detected on inode %lu\n", | ||
2895 | old_inode->i_ino); | ||
2896 | } | ||
2831 | goto end_rename; | 2897 | goto end_rename; |
2898 | } | ||
2832 | if (!buffer_verified(dir_bh) && | 2899 | if (!buffer_verified(dir_bh) && |
2833 | !ext4_dirent_csum_verify(old_inode, | 2900 | !ext4_dirent_csum_verify(old_inode, |
2834 | (struct ext4_dir_entry *)dir_bh->b_data)) | 2901 | (struct ext4_dir_entry *)dir_bh->b_data)) |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index dcdeef169a69..68e896e12a67 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -71,6 +71,9 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
71 | int i; | 71 | int i; |
72 | 72 | ||
73 | BUG_ON(!io); | 73 | BUG_ON(!io); |
74 | BUG_ON(!list_empty(&io->list)); | ||
75 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | ||
76 | |||
74 | if (io->page) | 77 | if (io->page) |
75 | put_page(io->page); | 78 | put_page(io->page); |
76 | for (i = 0; i < io->num_io_pages; i++) | 79 | for (i = 0; i < io->num_io_pages; i++) |
@@ -81,13 +84,8 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
81 | kmem_cache_free(io_end_cachep, io); | 84 | kmem_cache_free(io_end_cachep, io); |
82 | } | 85 | } |
83 | 86 | ||
84 | /* | 87 | /* check a range of space and convert unwritten extents to written. */ |
85 | * check a range of space and convert unwritten extents to written. | 88 | static int ext4_end_io(ext4_io_end_t *io) |
86 | * | ||
87 | * Called with inode->i_mutex; we depend on this when we manipulate | ||
88 | * io->flag, since we could otherwise race with ext4_flush_completed_IO() | ||
89 | */ | ||
90 | int ext4_end_io_nolock(ext4_io_end_t *io) | ||
91 | { | 89 | { |
92 | struct inode *inode = io->inode; | 90 | struct inode *inode = io->inode; |
93 | loff_t offset = io->offset; | 91 | loff_t offset = io->offset; |
@@ -106,63 +104,136 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
106 | "(inode %lu, offset %llu, size %zd, error %d)", | 104 | "(inode %lu, offset %llu, size %zd, error %d)", |
107 | inode->i_ino, offset, size, ret); | 105 | inode->i_ino, offset, size, ret); |
108 | } | 106 | } |
109 | |||
110 | if (io->iocb) | 107 | if (io->iocb) |
111 | aio_complete(io->iocb, io->result, 0); | 108 | aio_complete(io->iocb, io->result, 0); |
112 | 109 | ||
113 | if (io->flag & EXT4_IO_END_DIRECT) | 110 | if (io->flag & EXT4_IO_END_DIRECT) |
114 | inode_dio_done(inode); | 111 | inode_dio_done(inode); |
115 | /* Wake up anyone waiting on unwritten extent conversion */ | 112 | /* Wake up anyone waiting on unwritten extent conversion */ |
116 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) | 113 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) |
117 | wake_up_all(ext4_ioend_wq(io->inode)); | 114 | wake_up_all(ext4_ioend_wq(io->inode)); |
118 | return ret; | 115 | return ret; |
119 | } | 116 | } |
120 | 117 | ||
121 | /* | 118 | static void dump_completed_IO(struct inode *inode) |
122 | * work on completed aio dio IO, to convert unwritten extents to extents | 119 | { |
123 | */ | 120 | #ifdef EXT4FS_DEBUG |
124 | static void ext4_end_io_work(struct work_struct *work) | 121 | struct list_head *cur, *before, *after; |
122 | ext4_io_end_t *io, *io0, *io1; | ||
123 | unsigned long flags; | ||
124 | |||
125 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { | ||
126 | ext4_debug("inode %lu completed_io list is empty\n", | ||
127 | inode->i_ino); | ||
128 | return; | ||
129 | } | ||
130 | |||
131 | ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); | ||
132 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { | ||
133 | cur = &io->list; | ||
134 | before = cur->prev; | ||
135 | io0 = container_of(before, ext4_io_end_t, list); | ||
136 | after = cur->next; | ||
137 | io1 = container_of(after, ext4_io_end_t, list); | ||
138 | |||
139 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
140 | io, inode->i_ino, io0, io1); | ||
141 | } | ||
142 | #endif | ||
143 | } | ||
144 | |||
145 | /* Add the io_end to per-inode completed end_io list. */ | ||
146 | void ext4_add_complete_io(ext4_io_end_t *io_end) | ||
125 | { | 147 | { |
126 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 148 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
127 | struct inode *inode = io->inode; | 149 | struct workqueue_struct *wq; |
128 | struct ext4_inode_info *ei = EXT4_I(inode); | 150 | unsigned long flags; |
129 | unsigned long flags; | 151 | |
152 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | ||
153 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
130 | 154 | ||
131 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 155 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
132 | if (io->flag & EXT4_IO_END_IN_FSYNC) | 156 | if (list_empty(&ei->i_completed_io_list)) { |
133 | goto requeue; | 157 | io_end->flag |= EXT4_IO_END_QUEUED; |
134 | if (list_empty(&io->list)) { | 158 | queue_work(wq, &io_end->work); |
135 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
136 | goto free; | ||
137 | } | 159 | } |
160 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
161 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
162 | } | ||
138 | 163 | ||
139 | if (!mutex_trylock(&inode->i_mutex)) { | 164 | static int ext4_do_flush_completed_IO(struct inode *inode, |
140 | bool was_queued; | 165 | ext4_io_end_t *work_io) |
141 | requeue: | 166 | { |
142 | was_queued = !!(io->flag & EXT4_IO_END_QUEUED); | 167 | ext4_io_end_t *io; |
143 | io->flag |= EXT4_IO_END_QUEUED; | 168 | struct list_head unwritten, complete, to_free; |
144 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 169 | unsigned long flags; |
145 | /* | 170 | struct ext4_inode_info *ei = EXT4_I(inode); |
146 | * Requeue the work instead of waiting so that the work | 171 | int err, ret = 0; |
147 | * items queued after this can be processed. | 172 | |
148 | */ | 173 | INIT_LIST_HEAD(&complete); |
149 | queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); | 174 | INIT_LIST_HEAD(&to_free); |
150 | /* | 175 | |
151 | * To prevent the ext4-dio-unwritten thread from keeping | 176 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
152 | * requeueing end_io requests and occupying cpu for too long, | 177 | dump_completed_IO(inode); |
153 | * yield the cpu if it sees an end_io request that has already | 178 | list_replace_init(&ei->i_completed_io_list, &unwritten); |
154 | * been requeued. | 179 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
155 | */ | 180 | |
156 | if (was_queued) | 181 | while (!list_empty(&unwritten)) { |
157 | yield(); | 182 | io = list_entry(unwritten.next, ext4_io_end_t, list); |
158 | return; | 183 | BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); |
184 | list_del_init(&io->list); | ||
185 | |||
186 | err = ext4_end_io(io); | ||
187 | if (unlikely(!ret && err)) | ||
188 | ret = err; | ||
189 | |||
190 | list_add_tail(&io->list, &complete); | ||
191 | } | ||
192 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
193 | while (!list_empty(&complete)) { | ||
194 | io = list_entry(complete.next, ext4_io_end_t, list); | ||
195 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
196 | /* end_io context can not be destroyed now because it still | ||
197 | * used by queued worker. Worker thread will destroy it later */ | ||
198 | if (io->flag & EXT4_IO_END_QUEUED) | ||
199 | list_del_init(&io->list); | ||
200 | else | ||
201 | list_move(&io->list, &to_free); | ||
202 | } | ||
203 | /* If we are called from worker context, it is time to clear queued | ||
204 | * flag, and destroy it's end_io if it was converted already */ | ||
205 | if (work_io) { | ||
206 | work_io->flag &= ~EXT4_IO_END_QUEUED; | ||
207 | if (!(work_io->flag & EXT4_IO_END_UNWRITTEN)) | ||
208 | list_add_tail(&work_io->list, &to_free); | ||
159 | } | 209 | } |
160 | list_del_init(&io->list); | ||
161 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 210 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
162 | (void) ext4_end_io_nolock(io); | 211 | |
163 | mutex_unlock(&inode->i_mutex); | 212 | while (!list_empty(&to_free)) { |
164 | free: | 213 | io = list_entry(to_free.next, ext4_io_end_t, list); |
165 | ext4_free_io_end(io); | 214 | list_del_init(&io->list); |
215 | ext4_free_io_end(io); | ||
216 | } | ||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
222 | */ | ||
223 | static void ext4_end_io_work(struct work_struct *work) | ||
224 | { | ||
225 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
226 | ext4_do_flush_completed_IO(io->inode, io); | ||
227 | } | ||
228 | |||
229 | int ext4_flush_unwritten_io(struct inode *inode) | ||
230 | { | ||
231 | int ret; | ||
232 | WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && | ||
233 | !(inode->i_state & I_FREEING)); | ||
234 | ret = ext4_do_flush_completed_IO(inode, NULL); | ||
235 | ext4_unwritten_wait(inode); | ||
236 | return ret; | ||
166 | } | 237 | } |
167 | 238 | ||
168 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | 239 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
@@ -195,9 +266,7 @@ static void buffer_io_error(struct buffer_head *bh) | |||
195 | static void ext4_end_bio(struct bio *bio, int error) | 266 | static void ext4_end_bio(struct bio *bio, int error) |
196 | { | 267 | { |
197 | ext4_io_end_t *io_end = bio->bi_private; | 268 | ext4_io_end_t *io_end = bio->bi_private; |
198 | struct workqueue_struct *wq; | ||
199 | struct inode *inode; | 269 | struct inode *inode; |
200 | unsigned long flags; | ||
201 | int i; | 270 | int i; |
202 | sector_t bi_sector = bio->bi_sector; | 271 | sector_t bi_sector = bio->bi_sector; |
203 | 272 | ||
@@ -255,14 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
255 | return; | 324 | return; |
256 | } | 325 | } |
257 | 326 | ||
258 | /* Add the io_end to per-inode completed io list*/ | 327 | ext4_add_complete_io(io_end); |
259 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
260 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
261 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
262 | |||
263 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
264 | /* queue the work to convert unwritten extents to written */ | ||
265 | queue_work(wq, &io_end->work); | ||
266 | } | 328 | } |
267 | 329 | ||
268 | void ext4_io_submit(struct ext4_io_submit *io) | 330 | void ext4_io_submit(struct ext4_io_submit *io) |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 41f6ef68e2e1..7a75e1086961 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb) | |||
45 | smp_mb__after_clear_bit(); | 45 | smp_mb__after_clear_bit(); |
46 | } | 46 | } |
47 | 47 | ||
48 | static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, | ||
49 | ext4_group_t group) { | ||
50 | return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) << | ||
51 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
52 | } | ||
53 | |||
54 | static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb, | ||
55 | ext4_group_t group) { | ||
56 | group = ext4_meta_bg_first_group(sb, group); | ||
57 | return ext4_group_first_block_no(sb, group); | ||
58 | } | ||
59 | |||
60 | static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, | ||
61 | ext4_group_t group) { | ||
62 | ext4_grpblk_t overhead; | ||
63 | overhead = ext4_bg_num_gdb(sb, group); | ||
64 | if (ext4_bg_has_super(sb, group)) | ||
65 | overhead += 1 + | ||
66 | le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); | ||
67 | return overhead; | ||
68 | } | ||
69 | |||
48 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) | 70 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) |
49 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) | 71 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) |
50 | 72 | ||
@@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb, | |||
57 | ext4_fsblk_t end = start + input->blocks_count; | 79 | ext4_fsblk_t end = start + input->blocks_count; |
58 | ext4_group_t group = input->group; | 80 | ext4_group_t group = input->group; |
59 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; | 81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; |
60 | unsigned overhead = ext4_bg_has_super(sb, group) ? | 82 | unsigned overhead = ext4_group_overhead_blocks(sb, group); |
61 | (1 + ext4_bg_num_gdb(sb, group) + | ||
62 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
63 | ext4_fsblk_t metaend = start + overhead; | 83 | ext4_fsblk_t metaend = start + overhead; |
64 | struct buffer_head *bh = NULL; | 84 | struct buffer_head *bh = NULL; |
65 | ext4_grpblk_t free_blocks_count, offset; | 85 | ext4_grpblk_t free_blocks_count, offset; |
@@ -200,13 +220,15 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) | |||
200 | * be a partial of a flex group. | 220 | * be a partial of a flex group. |
201 | * | 221 | * |
202 | * @sb: super block of fs to which the groups belongs | 222 | * @sb: super block of fs to which the groups belongs |
223 | * | ||
224 | * Returns 0 on a successful allocation of the metadata blocks in the | ||
225 | * block group. | ||
203 | */ | 226 | */ |
204 | static void ext4_alloc_group_tables(struct super_block *sb, | 227 | static int ext4_alloc_group_tables(struct super_block *sb, |
205 | struct ext4_new_flex_group_data *flex_gd, | 228 | struct ext4_new_flex_group_data *flex_gd, |
206 | int flexbg_size) | 229 | int flexbg_size) |
207 | { | 230 | { |
208 | struct ext4_new_group_data *group_data = flex_gd->groups; | 231 | struct ext4_new_group_data *group_data = flex_gd->groups; |
209 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
210 | ext4_fsblk_t start_blk; | 232 | ext4_fsblk_t start_blk; |
211 | ext4_fsblk_t last_blk; | 233 | ext4_fsblk_t last_blk; |
212 | ext4_group_t src_group; | 234 | ext4_group_t src_group; |
@@ -226,23 +248,24 @@ static void ext4_alloc_group_tables(struct super_block *sb, | |||
226 | (last_group & ~(flexbg_size - 1)))); | 248 | (last_group & ~(flexbg_size - 1)))); |
227 | next_group: | 249 | next_group: |
228 | group = group_data[0].group; | 250 | group = group_data[0].group; |
251 | if (src_group >= group_data[0].group + flex_gd->count) | ||
252 | return -ENOSPC; | ||
229 | start_blk = ext4_group_first_block_no(sb, src_group); | 253 | start_blk = ext4_group_first_block_no(sb, src_group); |
230 | last_blk = start_blk + group_data[src_group - group].blocks_count; | 254 | last_blk = start_blk + group_data[src_group - group].blocks_count; |
231 | 255 | ||
232 | overhead = ext4_bg_has_super(sb, src_group) ? | 256 | overhead = ext4_group_overhead_blocks(sb, src_group); |
233 | (1 + ext4_bg_num_gdb(sb, src_group) + | ||
234 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
235 | 257 | ||
236 | start_blk += overhead; | 258 | start_blk += overhead; |
237 | 259 | ||
238 | BUG_ON(src_group >= group_data[0].group + flex_gd->count); | ||
239 | /* We collect contiguous blocks as much as possible. */ | 260 | /* We collect contiguous blocks as much as possible. */ |
240 | src_group++; | 261 | src_group++; |
241 | for (; src_group <= last_group; src_group++) | 262 | for (; src_group <= last_group; src_group++) { |
242 | if (!ext4_bg_has_super(sb, src_group)) | 263 | overhead = ext4_group_overhead_blocks(sb, src_group); |
264 | if (overhead != 0) | ||
243 | last_blk += group_data[src_group - group].blocks_count; | 265 | last_blk += group_data[src_group - group].blocks_count; |
244 | else | 266 | else |
245 | break; | 267 | break; |
268 | } | ||
246 | 269 | ||
247 | /* Allocate block bitmaps */ | 270 | /* Allocate block bitmaps */ |
248 | for (; bb_index < flex_gd->count; bb_index++) { | 271 | for (; bb_index < flex_gd->count; bb_index++) { |
@@ -300,6 +323,7 @@ next_group: | |||
300 | group_data[i].free_blocks_count); | 323 | group_data[i].free_blocks_count); |
301 | } | 324 | } |
302 | } | 325 | } |
326 | return 0; | ||
303 | } | 327 | } |
304 | 328 | ||
305 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, | 329 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, |
@@ -433,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |||
433 | ext4_group_t group, count; | 457 | ext4_group_t group, count; |
434 | struct buffer_head *bh = NULL; | 458 | struct buffer_head *bh = NULL; |
435 | int reserved_gdb, i, j, err = 0, err2; | 459 | int reserved_gdb, i, j, err = 0, err2; |
460 | int meta_bg; | ||
436 | 461 | ||
437 | BUG_ON(!flex_gd->count || !group_data || | 462 | BUG_ON(!flex_gd->count || !group_data || |
438 | group_data[0].group != sbi->s_groups_count); | 463 | group_data[0].group != sbi->s_groups_count); |
439 | 464 | ||
440 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); | 465 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); |
466 | meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); | ||
441 | 467 | ||
442 | /* This transaction may be extended/restarted along the way */ | 468 | /* This transaction may be extended/restarted along the way */ |
443 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); | 469 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); |
@@ -447,12 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |||
447 | group = group_data[0].group; | 473 | group = group_data[0].group; |
448 | for (i = 0; i < flex_gd->count; i++, group++) { | 474 | for (i = 0; i < flex_gd->count; i++, group++) { |
449 | unsigned long gdblocks; | 475 | unsigned long gdblocks; |
476 | ext4_grpblk_t overhead; | ||
450 | 477 | ||
451 | gdblocks = ext4_bg_num_gdb(sb, group); | 478 | gdblocks = ext4_bg_num_gdb(sb, group); |
452 | start = ext4_group_first_block_no(sb, group); | 479 | start = ext4_group_first_block_no(sb, group); |
453 | 480 | ||
481 | if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) | ||
482 | goto handle_itb; | ||
483 | |||
484 | if (meta_bg == 1) { | ||
485 | ext4_group_t first_group; | ||
486 | first_group = ext4_meta_bg_first_group(sb, group); | ||
487 | if (first_group != group + 1 && | ||
488 | first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) | ||
489 | goto handle_itb; | ||
490 | } | ||
491 | |||
492 | block = start + ext4_bg_has_super(sb, group); | ||
454 | /* Copy all of the GDT blocks into the backup in this group */ | 493 | /* Copy all of the GDT blocks into the backup in this group */ |
455 | for (j = 0, block = start + 1; j < gdblocks; j++, block++) { | 494 | for (j = 0; j < gdblocks; j++, block++) { |
456 | struct buffer_head *gdb; | 495 | struct buffer_head *gdb; |
457 | 496 | ||
458 | ext4_debug("update backup group %#04llx\n", block); | 497 | ext4_debug("update backup group %#04llx\n", block); |
@@ -493,6 +532,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, | |||
493 | goto out; | 532 | goto out; |
494 | } | 533 | } |
495 | 534 | ||
535 | handle_itb: | ||
496 | /* Initialize group tables of the grop @group */ | 536 | /* Initialize group tables of the grop @group */ |
497 | if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) | 537 | if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) |
498 | goto handle_bb; | 538 | goto handle_bb; |
@@ -521,11 +561,11 @@ handle_bb: | |||
521 | err = PTR_ERR(bh); | 561 | err = PTR_ERR(bh); |
522 | goto out; | 562 | goto out; |
523 | } | 563 | } |
524 | if (ext4_bg_has_super(sb, group)) { | 564 | overhead = ext4_group_overhead_blocks(sb, group); |
565 | if (overhead != 0) { | ||
525 | ext4_debug("mark backup superblock %#04llx (+0)\n", | 566 | ext4_debug("mark backup superblock %#04llx (+0)\n", |
526 | start); | 567 | start); |
527 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + | 568 | ext4_set_bits(bh->b_data, 0, overhead); |
528 | 1); | ||
529 | } | 569 | } |
530 | ext4_mark_bitmap_end(group_data[i].blocks_count, | 570 | ext4_mark_bitmap_end(group_data[i].blocks_count, |
531 | sb->s_blocksize * 8, bh->b_data); | 571 | sb->s_blocksize * 8, bh->b_data); |
@@ -822,6 +862,45 @@ exit_bh: | |||
822 | } | 862 | } |
823 | 863 | ||
824 | /* | 864 | /* |
865 | * add_new_gdb_meta_bg is the sister of add_new_gdb. | ||
866 | */ | ||
867 | static int add_new_gdb_meta_bg(struct super_block *sb, | ||
868 | handle_t *handle, ext4_group_t group) { | ||
869 | ext4_fsblk_t gdblock; | ||
870 | struct buffer_head *gdb_bh; | ||
871 | struct buffer_head **o_group_desc, **n_group_desc; | ||
872 | unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); | ||
873 | int err; | ||
874 | |||
875 | gdblock = ext4_meta_bg_first_block_no(sb, group) + | ||
876 | ext4_bg_has_super(sb, group); | ||
877 | gdb_bh = sb_bread(sb, gdblock); | ||
878 | if (!gdb_bh) | ||
879 | return -EIO; | ||
880 | n_group_desc = ext4_kvmalloc((gdb_num + 1) * | ||
881 | sizeof(struct buffer_head *), | ||
882 | GFP_NOFS); | ||
883 | if (!n_group_desc) { | ||
884 | err = -ENOMEM; | ||
885 | ext4_warning(sb, "not enough memory for %lu groups", | ||
886 | gdb_num + 1); | ||
887 | return err; | ||
888 | } | ||
889 | |||
890 | o_group_desc = EXT4_SB(sb)->s_group_desc; | ||
891 | memcpy(n_group_desc, o_group_desc, | ||
892 | EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); | ||
893 | n_group_desc[gdb_num] = gdb_bh; | ||
894 | EXT4_SB(sb)->s_group_desc = n_group_desc; | ||
895 | EXT4_SB(sb)->s_gdb_count++; | ||
896 | ext4_kvfree(o_group_desc); | ||
897 | err = ext4_journal_get_write_access(handle, gdb_bh); | ||
898 | if (unlikely(err)) | ||
899 | brelse(gdb_bh); | ||
900 | return err; | ||
901 | } | ||
902 | |||
903 | /* | ||
825 | * Called when we are adding a new group which has a backup copy of each of | 904 | * Called when we are adding a new group which has a backup copy of each of |
826 | * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. | 905 | * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. |
827 | * We need to add these reserved backup GDT blocks to the resize inode, so | 906 | * We need to add these reserved backup GDT blocks to the resize inode, so |
@@ -949,16 +1028,16 @@ exit_free: | |||
949 | * do not copy the full number of backups at this time. The resize | 1028 | * do not copy the full number of backups at this time. The resize |
950 | * which changed s_groups_count will backup again. | 1029 | * which changed s_groups_count will backup again. |
951 | */ | 1030 | */ |
952 | static void update_backups(struct super_block *sb, | 1031 | static void update_backups(struct super_block *sb, int blk_off, char *data, |
953 | int blk_off, char *data, int size) | 1032 | int size, int meta_bg) |
954 | { | 1033 | { |
955 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1034 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
956 | const ext4_group_t last = sbi->s_groups_count; | 1035 | ext4_group_t last; |
957 | const int bpg = EXT4_BLOCKS_PER_GROUP(sb); | 1036 | const int bpg = EXT4_BLOCKS_PER_GROUP(sb); |
958 | unsigned three = 1; | 1037 | unsigned three = 1; |
959 | unsigned five = 5; | 1038 | unsigned five = 5; |
960 | unsigned seven = 7; | 1039 | unsigned seven = 7; |
961 | ext4_group_t group; | 1040 | ext4_group_t group = 0; |
962 | int rest = sb->s_blocksize - size; | 1041 | int rest = sb->s_blocksize - size; |
963 | handle_t *handle; | 1042 | handle_t *handle; |
964 | int err = 0, err2; | 1043 | int err = 0, err2; |
@@ -970,10 +1049,17 @@ static void update_backups(struct super_block *sb, | |||
970 | goto exit_err; | 1049 | goto exit_err; |
971 | } | 1050 | } |
972 | 1051 | ||
973 | ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); | 1052 | if (meta_bg == 0) { |
1053 | group = ext4_list_backups(sb, &three, &five, &seven); | ||
1054 | last = sbi->s_groups_count; | ||
1055 | } else { | ||
1056 | group = ext4_meta_bg_first_group(sb, group) + 1; | ||
1057 | last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); | ||
1058 | } | ||
974 | 1059 | ||
975 | while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { | 1060 | while (group < sbi->s_groups_count) { |
976 | struct buffer_head *bh; | 1061 | struct buffer_head *bh; |
1062 | ext4_fsblk_t backup_block; | ||
977 | 1063 | ||
978 | /* Out of journal space, and can't get more - abort - so sad */ | 1064 | /* Out of journal space, and can't get more - abort - so sad */ |
979 | if (ext4_handle_valid(handle) && | 1065 | if (ext4_handle_valid(handle) && |
@@ -982,13 +1068,20 @@ static void update_backups(struct super_block *sb, | |||
982 | (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) | 1068 | (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) |
983 | break; | 1069 | break; |
984 | 1070 | ||
985 | bh = sb_getblk(sb, group * bpg + blk_off); | 1071 | if (meta_bg == 0) |
1072 | backup_block = group * bpg + blk_off; | ||
1073 | else | ||
1074 | backup_block = (ext4_group_first_block_no(sb, group) + | ||
1075 | ext4_bg_has_super(sb, group)); | ||
1076 | |||
1077 | bh = sb_getblk(sb, backup_block); | ||
986 | if (!bh) { | 1078 | if (!bh) { |
987 | err = -EIO; | 1079 | err = -EIO; |
988 | break; | 1080 | break; |
989 | } | 1081 | } |
990 | ext4_debug("update metadata backup %#04lx\n", | 1082 | ext4_debug("update metadata backup %llu(+%llu)\n", |
991 | (unsigned long)bh->b_blocknr); | 1083 | backup_block, backup_block - |
1084 | ext4_group_first_block_no(sb, group)); | ||
992 | if ((err = ext4_journal_get_write_access(handle, bh))) | 1085 | if ((err = ext4_journal_get_write_access(handle, bh))) |
993 | break; | 1086 | break; |
994 | lock_buffer(bh); | 1087 | lock_buffer(bh); |
@@ -1001,6 +1094,13 @@ static void update_backups(struct super_block *sb, | |||
1001 | if (unlikely(err)) | 1094 | if (unlikely(err)) |
1002 | ext4_std_error(sb, err); | 1095 | ext4_std_error(sb, err); |
1003 | brelse(bh); | 1096 | brelse(bh); |
1097 | |||
1098 | if (meta_bg == 0) | ||
1099 | group = ext4_list_backups(sb, &three, &five, &seven); | ||
1100 | else if (group == last) | ||
1101 | break; | ||
1102 | else | ||
1103 | group = last; | ||
1004 | } | 1104 | } |
1005 | if ((err2 = ext4_journal_stop(handle)) && !err) | 1105 | if ((err2 = ext4_journal_stop(handle)) && !err) |
1006 | err = err2; | 1106 | err = err2; |
@@ -1043,7 +1143,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, | |||
1043 | struct ext4_super_block *es = sbi->s_es; | 1143 | struct ext4_super_block *es = sbi->s_es; |
1044 | struct buffer_head *gdb_bh; | 1144 | struct buffer_head *gdb_bh; |
1045 | int i, gdb_off, gdb_num, err = 0; | 1145 | int i, gdb_off, gdb_num, err = 0; |
1146 | int meta_bg; | ||
1046 | 1147 | ||
1148 | meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); | ||
1047 | for (i = 0; i < count; i++, group++) { | 1149 | for (i = 0; i < count; i++, group++) { |
1048 | int reserved_gdb = ext4_bg_has_super(sb, group) ? | 1150 | int reserved_gdb = ext4_bg_has_super(sb, group) ? |
1049 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1151 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
@@ -1063,8 +1165,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, | |||
1063 | 1165 | ||
1064 | if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) | 1166 | if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) |
1065 | err = reserve_backup_gdb(handle, resize_inode, group); | 1167 | err = reserve_backup_gdb(handle, resize_inode, group); |
1066 | } else | 1168 | } else if (meta_bg != 0) { |
1169 | err = add_new_gdb_meta_bg(sb, handle, group); | ||
1170 | } else { | ||
1067 | err = add_new_gdb(handle, resize_inode, group); | 1171 | err = add_new_gdb(handle, resize_inode, group); |
1172 | } | ||
1068 | if (err) | 1173 | if (err) |
1069 | break; | 1174 | break; |
1070 | } | 1175 | } |
@@ -1076,17 +1181,12 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) | |||
1076 | struct buffer_head *bh = sb_getblk(sb, block); | 1181 | struct buffer_head *bh = sb_getblk(sb, block); |
1077 | if (!bh) | 1182 | if (!bh) |
1078 | return NULL; | 1183 | return NULL; |
1079 | 1184 | if (!bh_uptodate_or_lock(bh)) { | |
1080 | if (bitmap_uptodate(bh)) | 1185 | if (bh_submit_read(bh) < 0) { |
1081 | return bh; | 1186 | brelse(bh); |
1082 | 1187 | return NULL; | |
1083 | lock_buffer(bh); | 1188 | } |
1084 | if (bh_submit_read(bh) < 0) { | ||
1085 | unlock_buffer(bh); | ||
1086 | brelse(bh); | ||
1087 | return NULL; | ||
1088 | } | 1189 | } |
1089 | unlock_buffer(bh); | ||
1090 | 1190 | ||
1091 | return bh; | 1191 | return bh; |
1092 | } | 1192 | } |
@@ -1161,6 +1261,9 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, | |||
1161 | ext4_free_group_clusters_set(sb, gdp, | 1261 | ext4_free_group_clusters_set(sb, gdp, |
1162 | EXT4_B2C(sbi, group_data->free_blocks_count)); | 1262 | EXT4_B2C(sbi, group_data->free_blocks_count)); |
1163 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | 1263 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); |
1264 | if (ext4_has_group_desc_csum(sb)) | ||
1265 | ext4_itable_unused_set(sb, gdp, | ||
1266 | EXT4_INODES_PER_GROUP(sb)); | ||
1164 | gdp->bg_flags = cpu_to_le16(*bg_flags); | 1267 | gdp->bg_flags = cpu_to_le16(*bg_flags); |
1165 | ext4_group_desc_csum_set(sb, group, gdp); | 1268 | ext4_group_desc_csum_set(sb, group, gdp); |
1166 | 1269 | ||
@@ -1216,7 +1319,7 @@ static void ext4_update_super(struct super_block *sb, | |||
1216 | } | 1319 | } |
1217 | 1320 | ||
1218 | reserved_blocks = ext4_r_blocks_count(es) * 100; | 1321 | reserved_blocks = ext4_r_blocks_count(es) * 100; |
1219 | do_div(reserved_blocks, ext4_blocks_count(es)); | 1322 | reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es)); |
1220 | reserved_blocks *= blocks_count; | 1323 | reserved_blocks *= blocks_count; |
1221 | do_div(reserved_blocks, 100); | 1324 | do_div(reserved_blocks, 100); |
1222 | 1325 | ||
@@ -1227,6 +1330,7 @@ static void ext4_update_super(struct super_block *sb, | |||
1227 | le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * | 1330 | le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * |
1228 | flex_gd->count); | 1331 | flex_gd->count); |
1229 | 1332 | ||
1333 | ext4_debug("free blocks count %llu", ext4_free_blocks_count(es)); | ||
1230 | /* | 1334 | /* |
1231 | * We need to protect s_groups_count against other CPUs seeing | 1335 | * We need to protect s_groups_count against other CPUs seeing |
1232 | * inconsistent state in the superblock. | 1336 | * inconsistent state in the superblock. |
@@ -1261,6 +1365,8 @@ static void ext4_update_super(struct super_block *sb, | |||
1261 | percpu_counter_add(&sbi->s_freeinodes_counter, | 1365 | percpu_counter_add(&sbi->s_freeinodes_counter, |
1262 | EXT4_INODES_PER_GROUP(sb) * flex_gd->count); | 1366 | EXT4_INODES_PER_GROUP(sb) * flex_gd->count); |
1263 | 1367 | ||
1368 | ext4_debug("free blocks count %llu", | ||
1369 | percpu_counter_read(&sbi->s_freeclusters_counter)); | ||
1264 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 1370 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, |
1265 | EXT4_FEATURE_INCOMPAT_FLEX_BG) && | 1371 | EXT4_FEATURE_INCOMPAT_FLEX_BG) && |
1266 | sbi->s_log_groups_per_flex) { | 1372 | sbi->s_log_groups_per_flex) { |
@@ -1349,16 +1455,24 @@ exit_journal: | |||
1349 | err = err2; | 1455 | err = err2; |
1350 | 1456 | ||
1351 | if (!err) { | 1457 | if (!err) { |
1352 | int i; | 1458 | int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); |
1459 | int gdb_num_end = ((group + flex_gd->count - 1) / | ||
1460 | EXT4_DESC_PER_BLOCK(sb)); | ||
1461 | int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
1462 | EXT4_FEATURE_INCOMPAT_META_BG); | ||
1463 | sector_t old_gdb = 0; | ||
1464 | |||
1353 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | 1465 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, |
1354 | sizeof(struct ext4_super_block)); | 1466 | sizeof(struct ext4_super_block), 0); |
1355 | for (i = 0; i < flex_gd->count; i++, group++) { | 1467 | for (; gdb_num <= gdb_num_end; gdb_num++) { |
1356 | struct buffer_head *gdb_bh; | 1468 | struct buffer_head *gdb_bh; |
1357 | int gdb_num; | 1469 | |
1358 | gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); | ||
1359 | gdb_bh = sbi->s_group_desc[gdb_num]; | 1470 | gdb_bh = sbi->s_group_desc[gdb_num]; |
1471 | if (old_gdb == gdb_bh->b_blocknr) | ||
1472 | continue; | ||
1360 | update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, | 1473 | update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, |
1361 | gdb_bh->b_size); | 1474 | gdb_bh->b_size, meta_bg); |
1475 | old_gdb = gdb_bh->b_blocknr; | ||
1362 | } | 1476 | } |
1363 | } | 1477 | } |
1364 | exit: | 1478 | exit: |
@@ -1402,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, | |||
1402 | 1516 | ||
1403 | group_data[i].group = group + i; | 1517 | group_data[i].group = group + i; |
1404 | group_data[i].blocks_count = blocks_per_group; | 1518 | group_data[i].blocks_count = blocks_per_group; |
1405 | overhead = ext4_bg_has_super(sb, group + i) ? | 1519 | overhead = ext4_group_overhead_blocks(sb, group + i); |
1406 | (1 + ext4_bg_num_gdb(sb, group + i) + | ||
1407 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
1408 | group_data[i].free_blocks_count = blocks_per_group - overhead; | 1520 | group_data[i].free_blocks_count = blocks_per_group - overhead; |
1409 | if (ext4_has_group_desc_csum(sb)) | 1521 | if (ext4_has_group_desc_csum(sb)) |
1410 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | | 1522 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | |
@@ -1492,6 +1604,14 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
1492 | if (err) | 1604 | if (err) |
1493 | goto out; | 1605 | goto out; |
1494 | 1606 | ||
1607 | err = ext4_alloc_flex_bg_array(sb, input->group + 1); | ||
1608 | if (err) | ||
1609 | return err; | ||
1610 | |||
1611 | err = ext4_mb_alloc_groupinfo(sb, input->group + 1); | ||
1612 | if (err) | ||
1613 | goto out; | ||
1614 | |||
1495 | flex_gd.count = 1; | 1615 | flex_gd.count = 1; |
1496 | flex_gd.groups = input; | 1616 | flex_gd.groups = input; |
1497 | flex_gd.bg_flags = &bg_flags; | 1617 | flex_gd.bg_flags = &bg_flags; |
@@ -1544,11 +1664,13 @@ errout: | |||
1544 | err = err2; | 1664 | err = err2; |
1545 | 1665 | ||
1546 | if (!err) { | 1666 | if (!err) { |
1667 | ext4_fsblk_t first_block; | ||
1668 | first_block = ext4_group_first_block_no(sb, 0); | ||
1547 | if (test_opt(sb, DEBUG)) | 1669 | if (test_opt(sb, DEBUG)) |
1548 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | 1670 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " |
1549 | "blocks\n", ext4_blocks_count(es)); | 1671 | "blocks\n", ext4_blocks_count(es)); |
1550 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, | 1672 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, |
1551 | sizeof(struct ext4_super_block)); | 1673 | (char *)es, sizeof(struct ext4_super_block), 0); |
1552 | } | 1674 | } |
1553 | return err; | 1675 | return err; |
1554 | } | 1676 | } |
@@ -1631,6 +1753,94 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1631 | return err; | 1753 | return err; |
1632 | } /* ext4_group_extend */ | 1754 | } /* ext4_group_extend */ |
1633 | 1755 | ||
1756 | |||
1757 | static int num_desc_blocks(struct super_block *sb, ext4_group_t groups) | ||
1758 | { | ||
1759 | return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); | ||
1760 | } | ||
1761 | |||
1762 | /* | ||
1763 | * Release the resize inode and drop the resize_inode feature if there | ||
1764 | * are no more reserved gdt blocks, and then convert the file system | ||
1765 | * to enable meta_bg | ||
1766 | */ | ||
1767 | static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) | ||
1768 | { | ||
1769 | handle_t *handle; | ||
1770 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1771 | struct ext4_super_block *es = sbi->s_es; | ||
1772 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1773 | ext4_fsblk_t nr; | ||
1774 | int i, ret, err = 0; | ||
1775 | int credits = 1; | ||
1776 | |||
1777 | ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); | ||
1778 | if (inode) { | ||
1779 | if (es->s_reserved_gdt_blocks) { | ||
1780 | ext4_error(sb, "Unexpected non-zero " | ||
1781 | "s_reserved_gdt_blocks"); | ||
1782 | return -EPERM; | ||
1783 | } | ||
1784 | |||
1785 | /* Do a quick sanity check of the resize inode */ | ||
1786 | if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) | ||
1787 | goto invalid_resize_inode; | ||
1788 | for (i = 0; i < EXT4_N_BLOCKS; i++) { | ||
1789 | if (i == EXT4_DIND_BLOCK) { | ||
1790 | if (ei->i_data[i]) | ||
1791 | continue; | ||
1792 | else | ||
1793 | goto invalid_resize_inode; | ||
1794 | } | ||
1795 | if (ei->i_data[i]) | ||
1796 | goto invalid_resize_inode; | ||
1797 | } | ||
1798 | credits += 3; /* block bitmap, bg descriptor, resize inode */ | ||
1799 | } | ||
1800 | |||
1801 | handle = ext4_journal_start_sb(sb, credits); | ||
1802 | if (IS_ERR(handle)) | ||
1803 | return PTR_ERR(handle); | ||
1804 | |||
1805 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); | ||
1806 | if (err) | ||
1807 | goto errout; | ||
1808 | |||
1809 | EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE); | ||
1810 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); | ||
1811 | sbi->s_es->s_first_meta_bg = | ||
1812 | cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); | ||
1813 | |||
1814 | err = ext4_handle_dirty_super(handle, sb); | ||
1815 | if (err) { | ||
1816 | ext4_std_error(sb, err); | ||
1817 | goto errout; | ||
1818 | } | ||
1819 | |||
1820 | if (inode) { | ||
1821 | nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]); | ||
1822 | ext4_free_blocks(handle, inode, NULL, nr, 1, | ||
1823 | EXT4_FREE_BLOCKS_METADATA | | ||
1824 | EXT4_FREE_BLOCKS_FORGET); | ||
1825 | ei->i_data[EXT4_DIND_BLOCK] = 0; | ||
1826 | inode->i_blocks = 0; | ||
1827 | |||
1828 | err = ext4_mark_inode_dirty(handle, inode); | ||
1829 | if (err) | ||
1830 | ext4_std_error(sb, err); | ||
1831 | } | ||
1832 | |||
1833 | errout: | ||
1834 | ret = ext4_journal_stop(handle); | ||
1835 | if (!err) | ||
1836 | err = ret; | ||
1837 | return ret; | ||
1838 | |||
1839 | invalid_resize_inode: | ||
1840 | ext4_error(sb, "corrupted/inconsistent resize inode"); | ||
1841 | return -EINVAL; | ||
1842 | } | ||
1843 | |||
1634 | /* | 1844 | /* |
1635 | * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count | 1845 | * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count |
1636 | * | 1846 | * |
@@ -1643,21 +1853,31 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1643 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1853 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1644 | struct ext4_super_block *es = sbi->s_es; | 1854 | struct ext4_super_block *es = sbi->s_es; |
1645 | struct buffer_head *bh; | 1855 | struct buffer_head *bh; |
1646 | struct inode *resize_inode; | 1856 | struct inode *resize_inode = NULL; |
1647 | ext4_fsblk_t o_blocks_count; | 1857 | ext4_grpblk_t add, offset; |
1648 | ext4_group_t o_group; | ||
1649 | ext4_group_t n_group; | ||
1650 | ext4_grpblk_t offset, add; | ||
1651 | unsigned long n_desc_blocks; | 1858 | unsigned long n_desc_blocks; |
1652 | unsigned long o_desc_blocks; | 1859 | unsigned long o_desc_blocks; |
1653 | unsigned long desc_blocks; | 1860 | ext4_group_t o_group; |
1654 | int err = 0, flexbg_size = 1; | 1861 | ext4_group_t n_group; |
1862 | ext4_fsblk_t o_blocks_count; | ||
1863 | ext4_fsblk_t n_blocks_count_retry = 0; | ||
1864 | unsigned long last_update_time = 0; | ||
1865 | int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; | ||
1866 | int meta_bg; | ||
1655 | 1867 | ||
1868 | /* See if the device is actually as big as what was requested */ | ||
1869 | bh = sb_bread(sb, n_blocks_count - 1); | ||
1870 | if (!bh) { | ||
1871 | ext4_warning(sb, "can't read last block, resize aborted"); | ||
1872 | return -ENOSPC; | ||
1873 | } | ||
1874 | brelse(bh); | ||
1875 | |||
1876 | retry: | ||
1656 | o_blocks_count = ext4_blocks_count(es); | 1877 | o_blocks_count = ext4_blocks_count(es); |
1657 | 1878 | ||
1658 | if (test_opt(sb, DEBUG)) | 1879 | ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu " |
1659 | ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " | 1880 | "to %llu blocks", o_blocks_count, n_blocks_count); |
1660 | "to %llu blocks", o_blocks_count, n_blocks_count); | ||
1661 | 1881 | ||
1662 | if (n_blocks_count < o_blocks_count) { | 1882 | if (n_blocks_count < o_blocks_count) { |
1663 | /* On-line shrinking not supported */ | 1883 | /* On-line shrinking not supported */ |
@@ -1672,32 +1892,49 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1672 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); | 1892 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); |
1673 | ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); | 1893 | ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); |
1674 | 1894 | ||
1675 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / | 1895 | n_desc_blocks = num_desc_blocks(sb, n_group + 1); |
1676 | EXT4_DESC_PER_BLOCK(sb); | 1896 | o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); |
1677 | o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | ||
1678 | EXT4_DESC_PER_BLOCK(sb); | ||
1679 | desc_blocks = n_desc_blocks - o_desc_blocks; | ||
1680 | 1897 | ||
1681 | if (desc_blocks && | 1898 | meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); |
1682 | (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || | ||
1683 | le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { | ||
1684 | ext4_warning(sb, "No reserved GDT blocks, can't resize"); | ||
1685 | return -EPERM; | ||
1686 | } | ||
1687 | 1899 | ||
1688 | resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); | 1900 | if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { |
1689 | if (IS_ERR(resize_inode)) { | 1901 | if (meta_bg) { |
1690 | ext4_warning(sb, "Error opening resize inode"); | 1902 | ext4_error(sb, "resize_inode and meta_bg enabled " |
1691 | return PTR_ERR(resize_inode); | 1903 | "simultaneously"); |
1904 | return -EINVAL; | ||
1905 | } | ||
1906 | if (n_desc_blocks > o_desc_blocks + | ||
1907 | le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
1908 | n_blocks_count_retry = n_blocks_count; | ||
1909 | n_desc_blocks = o_desc_blocks + | ||
1910 | le16_to_cpu(es->s_reserved_gdt_blocks); | ||
1911 | n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); | ||
1912 | n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); | ||
1913 | n_group--; /* set to last group number */ | ||
1914 | } | ||
1915 | |||
1916 | if (!resize_inode) | ||
1917 | resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); | ||
1918 | if (IS_ERR(resize_inode)) { | ||
1919 | ext4_warning(sb, "Error opening resize inode"); | ||
1920 | return PTR_ERR(resize_inode); | ||
1921 | } | ||
1692 | } | 1922 | } |
1693 | 1923 | ||
1694 | /* See if the device is actually as big as what was requested */ | 1924 | if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) { |
1695 | bh = sb_bread(sb, n_blocks_count - 1); | 1925 | err = ext4_convert_meta_bg(sb, resize_inode); |
1696 | if (!bh) { | 1926 | if (err) |
1697 | ext4_warning(sb, "can't read last block, resize aborted"); | 1927 | goto out; |
1698 | return -ENOSPC; | 1928 | if (resize_inode) { |
1929 | iput(resize_inode); | ||
1930 | resize_inode = NULL; | ||
1931 | } | ||
1932 | if (n_blocks_count_retry) { | ||
1933 | n_blocks_count = n_blocks_count_retry; | ||
1934 | n_blocks_count_retry = 0; | ||
1935 | goto retry; | ||
1936 | } | ||
1699 | } | 1937 | } |
1700 | brelse(bh); | ||
1701 | 1938 | ||
1702 | /* extend the last group */ | 1939 | /* extend the last group */ |
1703 | if (n_group == o_group) | 1940 | if (n_group == o_group) |
@@ -1710,12 +1947,15 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1710 | goto out; | 1947 | goto out; |
1711 | } | 1948 | } |
1712 | 1949 | ||
1713 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && | 1950 | if (ext4_blocks_count(es) == n_blocks_count) |
1714 | es->s_log_groups_per_flex) | 1951 | goto out; |
1715 | flexbg_size = 1 << es->s_log_groups_per_flex; | ||
1716 | 1952 | ||
1717 | o_blocks_count = ext4_blocks_count(es); | 1953 | err = ext4_alloc_flex_bg_array(sb, n_group + 1); |
1718 | if (o_blocks_count == n_blocks_count) | 1954 | if (err) |
1955 | return err; | ||
1956 | |||
1957 | err = ext4_mb_alloc_groupinfo(sb, n_group + 1); | ||
1958 | if (err) | ||
1719 | goto out; | 1959 | goto out; |
1720 | 1960 | ||
1721 | flex_gd = alloc_flex_gd(flexbg_size); | 1961 | flex_gd = alloc_flex_gd(flexbg_size); |
@@ -1729,19 +1969,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | |||
1729 | */ | 1969 | */ |
1730 | while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, | 1970 | while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, |
1731 | flexbg_size)) { | 1971 | flexbg_size)) { |
1732 | ext4_alloc_group_tables(sb, flex_gd, flexbg_size); | 1972 | if (jiffies - last_update_time > HZ * 10) { |
1973 | if (last_update_time) | ||
1974 | ext4_msg(sb, KERN_INFO, | ||
1975 | "resized to %llu blocks", | ||
1976 | ext4_blocks_count(es)); | ||
1977 | last_update_time = jiffies; | ||
1978 | } | ||
1979 | if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) | ||
1980 | break; | ||
1733 | err = ext4_flex_group_add(sb, resize_inode, flex_gd); | 1981 | err = ext4_flex_group_add(sb, resize_inode, flex_gd); |
1734 | if (unlikely(err)) | 1982 | if (unlikely(err)) |
1735 | break; | 1983 | break; |
1736 | } | 1984 | } |
1737 | 1985 | ||
1986 | if (!err && n_blocks_count_retry) { | ||
1987 | n_blocks_count = n_blocks_count_retry; | ||
1988 | n_blocks_count_retry = 0; | ||
1989 | free_flex_gd(flex_gd); | ||
1990 | flex_gd = NULL; | ||
1991 | goto retry; | ||
1992 | } | ||
1993 | |||
1738 | out: | 1994 | out: |
1739 | if (flex_gd) | 1995 | if (flex_gd) |
1740 | free_flex_gd(flex_gd); | 1996 | free_flex_gd(flex_gd); |
1741 | 1997 | if (resize_inode != NULL) | |
1742 | iput(resize_inode); | 1998 | iput(resize_inode); |
1743 | if (test_opt(sb, DEBUG)) | 1999 | ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); |
1744 | ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " | ||
1745 | "upto %llu blocks", o_blocks_count, n_blocks_count); | ||
1746 | return err; | 2000 | return err; |
1747 | } | 2001 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 69c55d4e4626..7265a0367476 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -420,7 +420,7 @@ static void __save_error_info(struct super_block *sb, const char *func, | |||
420 | */ | 420 | */ |
421 | if (!es->s_error_count) | 421 | if (!es->s_error_count) |
422 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); | 422 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); |
423 | es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); | 423 | le32_add_cpu(&es->s_error_count, 1); |
424 | } | 424 | } |
425 | 425 | ||
426 | static void save_error_info(struct super_block *sb, const char *func, | 426 | static void save_error_info(struct super_block *sb, const char *func, |
@@ -850,7 +850,6 @@ static void ext4_put_super(struct super_block *sb) | |||
850 | flush_workqueue(sbi->dio_unwritten_wq); | 850 | flush_workqueue(sbi->dio_unwritten_wq); |
851 | destroy_workqueue(sbi->dio_unwritten_wq); | 851 | destroy_workqueue(sbi->dio_unwritten_wq); |
852 | 852 | ||
853 | lock_super(sb); | ||
854 | if (sbi->s_journal) { | 853 | if (sbi->s_journal) { |
855 | err = jbd2_journal_destroy(sbi->s_journal); | 854 | err = jbd2_journal_destroy(sbi->s_journal); |
856 | sbi->s_journal = NULL; | 855 | sbi->s_journal = NULL; |
@@ -917,7 +916,6 @@ static void ext4_put_super(struct super_block *sb) | |||
917 | * Now that we are completely done shutting down the | 916 | * Now that we are completely done shutting down the |
918 | * superblock, we need to actually destroy the kobject. | 917 | * superblock, we need to actually destroy the kobject. |
919 | */ | 918 | */ |
920 | unlock_super(sb); | ||
921 | kobject_put(&sbi->s_kobj); | 919 | kobject_put(&sbi->s_kobj); |
922 | wait_for_completion(&sbi->s_kobj_unregister); | 920 | wait_for_completion(&sbi->s_kobj_unregister); |
923 | if (sbi->s_chksum_driver) | 921 | if (sbi->s_chksum_driver) |
@@ -956,11 +954,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
956 | ei->jinode = NULL; | 954 | ei->jinode = NULL; |
957 | INIT_LIST_HEAD(&ei->i_completed_io_list); | 955 | INIT_LIST_HEAD(&ei->i_completed_io_list); |
958 | spin_lock_init(&ei->i_completed_io_lock); | 956 | spin_lock_init(&ei->i_completed_io_lock); |
959 | ei->cur_aio_dio = NULL; | ||
960 | ei->i_sync_tid = 0; | 957 | ei->i_sync_tid = 0; |
961 | ei->i_datasync_tid = 0; | 958 | ei->i_datasync_tid = 0; |
962 | atomic_set(&ei->i_ioend_count, 0); | 959 | atomic_set(&ei->i_ioend_count, 0); |
963 | atomic_set(&ei->i_aiodio_unwritten, 0); | 960 | atomic_set(&ei->i_unwritten, 0); |
964 | 961 | ||
965 | return &ei->vfs_inode; | 962 | return &ei->vfs_inode; |
966 | } | 963 | } |
@@ -1224,6 +1221,7 @@ enum { | |||
1224 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1221 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1225 | Opt_dioread_nolock, Opt_dioread_lock, | 1222 | Opt_dioread_nolock, Opt_dioread_lock, |
1226 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, | 1223 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, |
1224 | Opt_max_dir_size_kb, | ||
1227 | }; | 1225 | }; |
1228 | 1226 | ||
1229 | static const match_table_t tokens = { | 1227 | static const match_table_t tokens = { |
@@ -1297,6 +1295,7 @@ static const match_table_t tokens = { | |||
1297 | {Opt_init_itable, "init_itable=%u"}, | 1295 | {Opt_init_itable, "init_itable=%u"}, |
1298 | {Opt_init_itable, "init_itable"}, | 1296 | {Opt_init_itable, "init_itable"}, |
1299 | {Opt_noinit_itable, "noinit_itable"}, | 1297 | {Opt_noinit_itable, "noinit_itable"}, |
1298 | {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, | ||
1300 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ | 1299 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ |
1301 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ | 1300 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ |
1302 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ | 1301 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ |
@@ -1477,6 +1476,7 @@ static const struct mount_opts { | |||
1477 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, | 1476 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, |
1478 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, | 1477 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, |
1479 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, | 1478 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, |
1479 | {Opt_max_dir_size_kb, 0, MOPT_GTE0}, | ||
1480 | {Opt_err, 0, 0} | 1480 | {Opt_err, 0, 0} |
1481 | }; | 1481 | }; |
1482 | 1482 | ||
@@ -1592,6 +1592,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1592 | if (!args->from) | 1592 | if (!args->from) |
1593 | arg = EXT4_DEF_LI_WAIT_MULT; | 1593 | arg = EXT4_DEF_LI_WAIT_MULT; |
1594 | sbi->s_li_wait_mult = arg; | 1594 | sbi->s_li_wait_mult = arg; |
1595 | } else if (token == Opt_max_dir_size_kb) { | ||
1596 | sbi->s_max_dir_size_kb = arg; | ||
1595 | } else if (token == Opt_stripe) { | 1597 | } else if (token == Opt_stripe) { |
1596 | sbi->s_stripe = arg; | 1598 | sbi->s_stripe = arg; |
1597 | } else if (m->flags & MOPT_DATAJ) { | 1599 | } else if (m->flags & MOPT_DATAJ) { |
@@ -1664,7 +1666,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1664 | * Initialize args struct so we know whether arg was | 1666 | * Initialize args struct so we know whether arg was |
1665 | * found; some options take optional arguments. | 1667 | * found; some options take optional arguments. |
1666 | */ | 1668 | */ |
1667 | args[0].to = args[0].from = 0; | 1669 | args[0].to = args[0].from = NULL; |
1668 | token = match_token(p, tokens, args); | 1670 | token = match_token(p, tokens, args); |
1669 | if (handle_mount_opt(sb, p, token, args, journal_devnum, | 1671 | if (handle_mount_opt(sb, p, token, args, journal_devnum, |
1670 | journal_ioprio, is_remount) < 0) | 1672 | journal_ioprio, is_remount) < 0) |
@@ -1740,7 +1742,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
1740 | 1742 | ||
1741 | static const char *token2str(int token) | 1743 | static const char *token2str(int token) |
1742 | { | 1744 | { |
1743 | static const struct match_token *t; | 1745 | const struct match_token *t; |
1744 | 1746 | ||
1745 | for (t = tokens; t->token != Opt_err; t++) | 1747 | for (t = tokens; t->token != Opt_err; t++) |
1746 | if (t->token == token && !strchr(t->pattern, '=')) | 1748 | if (t->token == token && !strchr(t->pattern, '=')) |
@@ -1823,6 +1825,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, | |||
1823 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && | 1825 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && |
1824 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) | 1826 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) |
1825 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); | 1827 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); |
1828 | if (nodefs || sbi->s_max_dir_size_kb) | ||
1829 | SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); | ||
1826 | 1830 | ||
1827 | ext4_show_quota_options(seq, sb); | 1831 | ext4_show_quota_options(seq, sb); |
1828 | return 0; | 1832 | return 0; |
@@ -1914,15 +1918,45 @@ done: | |||
1914 | return res; | 1918 | return res; |
1915 | } | 1919 | } |
1916 | 1920 | ||
1921 | int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) | ||
1922 | { | ||
1923 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1924 | struct flex_groups *new_groups; | ||
1925 | int size; | ||
1926 | |||
1927 | if (!sbi->s_log_groups_per_flex) | ||
1928 | return 0; | ||
1929 | |||
1930 | size = ext4_flex_group(sbi, ngroup - 1) + 1; | ||
1931 | if (size <= sbi->s_flex_groups_allocated) | ||
1932 | return 0; | ||
1933 | |||
1934 | size = roundup_pow_of_two(size * sizeof(struct flex_groups)); | ||
1935 | new_groups = ext4_kvzalloc(size, GFP_KERNEL); | ||
1936 | if (!new_groups) { | ||
1937 | ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", | ||
1938 | size / (int) sizeof(struct flex_groups)); | ||
1939 | return -ENOMEM; | ||
1940 | } | ||
1941 | |||
1942 | if (sbi->s_flex_groups) { | ||
1943 | memcpy(new_groups, sbi->s_flex_groups, | ||
1944 | (sbi->s_flex_groups_allocated * | ||
1945 | sizeof(struct flex_groups))); | ||
1946 | ext4_kvfree(sbi->s_flex_groups); | ||
1947 | } | ||
1948 | sbi->s_flex_groups = new_groups; | ||
1949 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); | ||
1950 | return 0; | ||
1951 | } | ||
1952 | |||
1917 | static int ext4_fill_flex_info(struct super_block *sb) | 1953 | static int ext4_fill_flex_info(struct super_block *sb) |
1918 | { | 1954 | { |
1919 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1955 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1920 | struct ext4_group_desc *gdp = NULL; | 1956 | struct ext4_group_desc *gdp = NULL; |
1921 | ext4_group_t flex_group_count; | ||
1922 | ext4_group_t flex_group; | 1957 | ext4_group_t flex_group; |
1923 | unsigned int groups_per_flex = 0; | 1958 | unsigned int groups_per_flex = 0; |
1924 | size_t size; | 1959 | int i, err; |
1925 | int i; | ||
1926 | 1960 | ||
1927 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1961 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1928 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { | 1962 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { |
@@ -1931,17 +1965,9 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1931 | } | 1965 | } |
1932 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 1966 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; |
1933 | 1967 | ||
1934 | /* We allocate both existing and potentially added groups */ | 1968 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); |
1935 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + | 1969 | if (err) |
1936 | ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << | ||
1937 | EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; | ||
1938 | size = flex_group_count * sizeof(struct flex_groups); | ||
1939 | sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); | ||
1940 | if (sbi->s_flex_groups == NULL) { | ||
1941 | ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", | ||
1942 | flex_group_count); | ||
1943 | goto failed; | 1970 | goto failed; |
1944 | } | ||
1945 | 1971 | ||
1946 | for (i = 0; i < sbi->s_groups_count; i++) { | 1972 | for (i = 0; i < sbi->s_groups_count; i++) { |
1947 | gdp = ext4_get_group_desc(sb, i, NULL); | 1973 | gdp = ext4_get_group_desc(sb, i, NULL); |
@@ -2144,10 +2170,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2144 | } | 2170 | } |
2145 | 2171 | ||
2146 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2172 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2147 | if (es->s_last_orphan) | 2173 | /* don't clear list on RO mount w/ errors */ |
2174 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { | ||
2148 | jbd_debug(1, "Errors on filesystem, " | 2175 | jbd_debug(1, "Errors on filesystem, " |
2149 | "clearing orphan list.\n"); | 2176 | "clearing orphan list.\n"); |
2150 | es->s_last_orphan = 0; | 2177 | es->s_last_orphan = 0; |
2178 | } | ||
2151 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); | 2179 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); |
2152 | return; | 2180 | return; |
2153 | } | 2181 | } |
@@ -2528,6 +2556,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | |||
2528 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2556 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2529 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2557 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2530 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2558 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); |
2559 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | ||
2531 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2560 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
2532 | 2561 | ||
2533 | static struct attribute *ext4_attrs[] = { | 2562 | static struct attribute *ext4_attrs[] = { |
@@ -2543,6 +2572,7 @@ static struct attribute *ext4_attrs[] = { | |||
2543 | ATTR_LIST(mb_stream_req), | 2572 | ATTR_LIST(mb_stream_req), |
2544 | ATTR_LIST(mb_group_prealloc), | 2573 | ATTR_LIST(mb_group_prealloc), |
2545 | ATTR_LIST(max_writeback_mb_bump), | 2574 | ATTR_LIST(max_writeback_mb_bump), |
2575 | ATTR_LIST(extent_max_zeroout_kb), | ||
2546 | ATTR_LIST(trigger_fs_error), | 2576 | ATTR_LIST(trigger_fs_error), |
2547 | NULL, | 2577 | NULL, |
2548 | }; | 2578 | }; |
@@ -2550,10 +2580,12 @@ static struct attribute *ext4_attrs[] = { | |||
2550 | /* Features this copy of ext4 supports */ | 2580 | /* Features this copy of ext4 supports */ |
2551 | EXT4_INFO_ATTR(lazy_itable_init); | 2581 | EXT4_INFO_ATTR(lazy_itable_init); |
2552 | EXT4_INFO_ATTR(batched_discard); | 2582 | EXT4_INFO_ATTR(batched_discard); |
2583 | EXT4_INFO_ATTR(meta_bg_resize); | ||
2553 | 2584 | ||
2554 | static struct attribute *ext4_feat_attrs[] = { | 2585 | static struct attribute *ext4_feat_attrs[] = { |
2555 | ATTR_LIST(lazy_itable_init), | 2586 | ATTR_LIST(lazy_itable_init), |
2556 | ATTR_LIST(batched_discard), | 2587 | ATTR_LIST(batched_discard), |
2588 | ATTR_LIST(meta_bg_resize), | ||
2557 | NULL, | 2589 | NULL, |
2558 | }; | 2590 | }; |
2559 | 2591 | ||
@@ -3374,7 +3406,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3374 | * enable delayed allocation by default | 3406 | * enable delayed allocation by default |
3375 | * Use -o nodelalloc to turn it off | 3407 | * Use -o nodelalloc to turn it off |
3376 | */ | 3408 | */ |
3377 | if (!IS_EXT3_SB(sb) && | 3409 | if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && |
3378 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) | 3410 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) |
3379 | set_opt(sb, DELALLOC); | 3411 | set_opt(sb, DELALLOC); |
3380 | 3412 | ||
@@ -3743,6 +3775,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3743 | 3775 | ||
3744 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3776 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3745 | sbi->s_max_writeback_mb_bump = 128; | 3777 | sbi->s_max_writeback_mb_bump = 128; |
3778 | sbi->s_extent_max_zeroout_kb = 32; | ||
3746 | 3779 | ||
3747 | /* | 3780 | /* |
3748 | * set up enough so that it can read an inode | 3781 | * set up enough so that it can read an inode |
@@ -4519,11 +4552,9 @@ static int ext4_unfreeze(struct super_block *sb) | |||
4519 | if (sb->s_flags & MS_RDONLY) | 4552 | if (sb->s_flags & MS_RDONLY) |
4520 | return 0; | 4553 | return 0; |
4521 | 4554 | ||
4522 | lock_super(sb); | ||
4523 | /* Reset the needs_recovery flag before the fs is unlocked. */ | 4555 | /* Reset the needs_recovery flag before the fs is unlocked. */ |
4524 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4556 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
4525 | ext4_commit_super(sb, 1); | 4557 | ext4_commit_super(sb, 1); |
4526 | unlock_super(sb); | ||
4527 | return 0; | 4558 | return 0; |
4528 | } | 4559 | } |
4529 | 4560 | ||
@@ -4559,7 +4590,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4559 | char *orig_data = kstrdup(data, GFP_KERNEL); | 4590 | char *orig_data = kstrdup(data, GFP_KERNEL); |
4560 | 4591 | ||
4561 | /* Store the original options */ | 4592 | /* Store the original options */ |
4562 | lock_super(sb); | ||
4563 | old_sb_flags = sb->s_flags; | 4593 | old_sb_flags = sb->s_flags; |
4564 | old_opts.s_mount_opt = sbi->s_mount_opt; | 4594 | old_opts.s_mount_opt = sbi->s_mount_opt; |
4565 | old_opts.s_mount_opt2 = sbi->s_mount_opt2; | 4595 | old_opts.s_mount_opt2 = sbi->s_mount_opt2; |
@@ -4701,7 +4731,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4701 | if (sbi->s_journal == NULL) | 4731 | if (sbi->s_journal == NULL) |
4702 | ext4_commit_super(sb, 1); | 4732 | ext4_commit_super(sb, 1); |
4703 | 4733 | ||
4704 | unlock_super(sb); | ||
4705 | #ifdef CONFIG_QUOTA | 4734 | #ifdef CONFIG_QUOTA |
4706 | /* Release old quota file names */ | 4735 | /* Release old quota file names */ |
4707 | for (i = 0; i < MAXQUOTAS; i++) | 4736 | for (i = 0; i < MAXQUOTAS; i++) |
@@ -4714,10 +4743,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4714 | else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4743 | else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
4715 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { | 4744 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { |
4716 | err = ext4_enable_quotas(sb); | 4745 | err = ext4_enable_quotas(sb); |
4717 | if (err) { | 4746 | if (err) |
4718 | lock_super(sb); | ||
4719 | goto restore_opts; | 4747 | goto restore_opts; |
4720 | } | ||
4721 | } | 4748 | } |
4722 | } | 4749 | } |
4723 | #endif | 4750 | #endif |
@@ -4744,7 +4771,6 @@ restore_opts: | |||
4744 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 4771 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
4745 | } | 4772 | } |
4746 | #endif | 4773 | #endif |
4747 | unlock_super(sb); | ||
4748 | kfree(orig_data); | 4774 | kfree(orig_data); |
4749 | return err; | 4775 | return err; |
4750 | } | 4776 | } |
@@ -5269,8 +5295,10 @@ static int __init ext4_init_fs(void) | |||
5269 | if (err) | 5295 | if (err) |
5270 | goto out6; | 5296 | goto out6; |
5271 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 5297 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
5272 | if (!ext4_kset) | 5298 | if (!ext4_kset) { |
5299 | err = -ENOMEM; | ||
5273 | goto out5; | 5300 | goto out5; |
5301 | } | ||
5274 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 5302 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
5275 | 5303 | ||
5276 | err = ext4_init_feat_adverts(); | 5304 | err = ext4_init_feat_adverts(); |