diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Makefile | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 5 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 7 | ||||
-rw-r--r-- | fs/ext4/dir.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 112 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 65 | ||||
-rw-r--r-- | fs/ext4/extents.c | 368 | ||||
-rw-r--r-- | fs/ext4/file.c | 44 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 83 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 135 | ||||
-rw-r--r-- | fs/ext4/inode.c | 599 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 24 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 553 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 22 | ||||
-rw-r--r-- | fs/ext4/namei.c | 65 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 431 | ||||
-rw-r--r-- | fs/ext4/resize.c | 52 | ||||
-rw-r--r-- | fs/ext4/super.c | 620 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 4 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 10 |
21 files changed, 2215 insertions, 990 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8867b2a1e5fe..c947e36eda6c 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_EXT4_FS) += ext4.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o |
10 | 10 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index bd30799a43ed..14c3af26c671 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
171 | * less than the blocksize * 8 ( which is the size | 171 | * less than the blocksize * 8 ( which is the size |
172 | * of bitmap ), set rest of the block bitmap to 1 | 172 | * of bitmap ), set rest of the block bitmap to 1 |
173 | */ | 173 | */ |
174 | mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); | 174 | ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8, |
175 | bh->b_data); | ||
175 | } | 176 | } |
176 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); | 177 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); |
177 | } | 178 | } |
@@ -489,7 +490,7 @@ error_return: | |||
489 | * Check if filesystem has nblocks free & available for allocation. | 490 | * Check if filesystem has nblocks free & available for allocation. |
490 | * On success return 1, return 0 on failure. | 491 | * On success return 1, return 0 on failure. |
491 | */ | 492 | */ |
492 | int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | 493 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) |
493 | { | 494 | { |
494 | s64 free_blocks, dirty_blocks, root_blocks; | 495 | s64 free_blocks, dirty_blocks, root_blocks; |
495 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | 496 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 3db5084db9bd..fac90f3fba80 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -29,16 +29,15 @@ struct ext4_system_zone { | |||
29 | 29 | ||
30 | static struct kmem_cache *ext4_system_zone_cachep; | 30 | static struct kmem_cache *ext4_system_zone_cachep; |
31 | 31 | ||
32 | int __init init_ext4_system_zone(void) | 32 | int __init ext4_init_system_zone(void) |
33 | { | 33 | { |
34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, | 34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0); |
35 | SLAB_RECLAIM_ACCOUNT); | ||
36 | if (ext4_system_zone_cachep == NULL) | 35 | if (ext4_system_zone_cachep == NULL) |
37 | return -ENOMEM; | 36 | return -ENOMEM; |
38 | return 0; | 37 | return 0; |
39 | } | 38 | } |
40 | 39 | ||
41 | void exit_ext4_system_zone(void) | 40 | void ext4_exit_system_zone(void) |
42 | { | 41 | { |
43 | kmem_cache_destroy(ext4_system_zone_cachep); | 42 | kmem_cache_destroy(ext4_system_zone_cachep); |
44 | } | 43 | } |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 374510f72baa..ece76fb6a40c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode, | |||
39 | struct file *filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = ext4_llseek, |
43 | .read = generic_read_dir, | 43 | .read = generic_read_dir, |
44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ | 44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ |
45 | .unlocked_ioctl = ext4_ioctl, | 45 | .unlocked_ioctl = ext4_ioctl, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 889ec9d5e6ad..6a5edea2d70b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -168,7 +168,20 @@ struct mpage_da_data { | |||
168 | int pages_written; | 168 | int pages_written; |
169 | int retval; | 169 | int retval; |
170 | }; | 170 | }; |
171 | #define EXT4_IO_UNWRITTEN 0x1 | 171 | |
172 | /* | ||
173 | * Flags for ext4_io_end->flags | ||
174 | */ | ||
175 | #define EXT4_IO_END_UNWRITTEN 0x0001 | ||
176 | #define EXT4_IO_END_ERROR 0x0002 | ||
177 | |||
178 | struct ext4_io_page { | ||
179 | struct page *p_page; | ||
180 | atomic_t p_count; | ||
181 | }; | ||
182 | |||
183 | #define MAX_IO_PAGES 128 | ||
184 | |||
172 | typedef struct ext4_io_end { | 185 | typedef struct ext4_io_end { |
173 | struct list_head list; /* per-file finished IO list */ | 186 | struct list_head list; /* per-file finished IO list */ |
174 | struct inode *inode; /* file being written to */ | 187 | struct inode *inode; /* file being written to */ |
@@ -179,8 +192,18 @@ typedef struct ext4_io_end { | |||
179 | struct work_struct work; /* data work queue */ | 192 | struct work_struct work; /* data work queue */ |
180 | struct kiocb *iocb; /* iocb struct for AIO */ | 193 | struct kiocb *iocb; /* iocb struct for AIO */ |
181 | int result; /* error value for AIO */ | 194 | int result; /* error value for AIO */ |
195 | int num_io_pages; | ||
196 | struct ext4_io_page *pages[MAX_IO_PAGES]; | ||
182 | } ext4_io_end_t; | 197 | } ext4_io_end_t; |
183 | 198 | ||
199 | struct ext4_io_submit { | ||
200 | int io_op; | ||
201 | struct bio *io_bio; | ||
202 | ext4_io_end_t *io_end; | ||
203 | struct ext4_io_page *io_page; | ||
204 | sector_t io_next_block; | ||
205 | }; | ||
206 | |||
184 | /* | 207 | /* |
185 | * Special inodes numbers | 208 | * Special inodes numbers |
186 | */ | 209 | */ |
@@ -205,6 +228,7 @@ typedef struct ext4_io_end { | |||
205 | #define EXT4_MIN_BLOCK_SIZE 1024 | 228 | #define EXT4_MIN_BLOCK_SIZE 1024 |
206 | #define EXT4_MAX_BLOCK_SIZE 65536 | 229 | #define EXT4_MAX_BLOCK_SIZE 65536 |
207 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 | 230 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 |
231 | #define EXT4_MAX_BLOCK_LOG_SIZE 16 | ||
208 | #ifdef __KERNEL__ | 232 | #ifdef __KERNEL__ |
209 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) | 233 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) |
210 | #else | 234 | #else |
@@ -834,6 +858,7 @@ struct ext4_inode_info { | |||
834 | spinlock_t i_completed_io_lock; | 858 | spinlock_t i_completed_io_lock; |
835 | /* current io_end structure for async DIO write*/ | 859 | /* current io_end structure for async DIO write*/ |
836 | ext4_io_end_t *cur_aio_dio; | 860 | ext4_io_end_t *cur_aio_dio; |
861 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | ||
837 | 862 | ||
838 | /* | 863 | /* |
839 | * Transactions that contain inode's metadata needed to complete | 864 | * Transactions that contain inode's metadata needed to complete |
@@ -889,6 +914,7 @@ struct ext4_inode_info { | |||
889 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 914 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
890 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 915 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
891 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 916 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
917 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | ||
892 | 918 | ||
893 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 919 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
894 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt | 920 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt |
@@ -1087,7 +1113,6 @@ struct ext4_sb_info { | |||
1087 | struct completion s_kobj_unregister; | 1113 | struct completion s_kobj_unregister; |
1088 | 1114 | ||
1089 | /* Journaling */ | 1115 | /* Journaling */ |
1090 | struct inode *s_journal_inode; | ||
1091 | struct journal_s *s_journal; | 1116 | struct journal_s *s_journal; |
1092 | struct list_head s_orphan; | 1117 | struct list_head s_orphan; |
1093 | struct mutex s_orphan_lock; | 1118 | struct mutex s_orphan_lock; |
@@ -1120,10 +1145,7 @@ struct ext4_sb_info { | |||
1120 | /* for buddy allocator */ | 1145 | /* for buddy allocator */ |
1121 | struct ext4_group_info ***s_group_info; | 1146 | struct ext4_group_info ***s_group_info; |
1122 | struct inode *s_buddy_cache; | 1147 | struct inode *s_buddy_cache; |
1123 | long s_blocks_reserved; | ||
1124 | spinlock_t s_reserve_lock; | ||
1125 | spinlock_t s_md_lock; | 1148 | spinlock_t s_md_lock; |
1126 | tid_t s_last_transaction; | ||
1127 | unsigned short *s_mb_offsets; | 1149 | unsigned short *s_mb_offsets; |
1128 | unsigned int *s_mb_maxs; | 1150 | unsigned int *s_mb_maxs; |
1129 | 1151 | ||
@@ -1141,7 +1163,6 @@ struct ext4_sb_info { | |||
1141 | unsigned long s_mb_last_start; | 1163 | unsigned long s_mb_last_start; |
1142 | 1164 | ||
1143 | /* stats for buddy allocator */ | 1165 | /* stats for buddy allocator */ |
1144 | spinlock_t s_mb_pa_lock; | ||
1145 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | 1166 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ |
1146 | atomic_t s_bal_success; /* we found long enough chunks */ | 1167 | atomic_t s_bal_success; /* we found long enough chunks */ |
1147 | atomic_t s_bal_allocated; /* in blocks */ | 1168 | atomic_t s_bal_allocated; /* in blocks */ |
@@ -1172,6 +1193,11 @@ struct ext4_sb_info { | |||
1172 | 1193 | ||
1173 | /* timer for periodic error stats printing */ | 1194 | /* timer for periodic error stats printing */ |
1174 | struct timer_list s_err_report; | 1195 | struct timer_list s_err_report; |
1196 | |||
1197 | /* Lazy inode table initialization info */ | ||
1198 | struct ext4_li_request *s_li_request; | ||
1199 | /* Wait multiplier for lazy initialization thread */ | ||
1200 | unsigned int s_li_wait_mult; | ||
1175 | }; | 1201 | }; |
1176 | 1202 | ||
1177 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1203 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1533,7 +1559,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
1533 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 1559 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
1534 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); | 1560 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); |
1535 | 1561 | ||
1536 | extern struct proc_dir_entry *ext4_proc_root; | 1562 | /* |
1563 | * Timeout and state flag for lazy initialization inode thread. | ||
1564 | */ | ||
1565 | #define EXT4_DEF_LI_WAIT_MULT 10 | ||
1566 | #define EXT4_DEF_LI_MAX_START_DELAY 5 | ||
1567 | #define EXT4_LAZYINIT_QUIT 0x0001 | ||
1568 | #define EXT4_LAZYINIT_RUNNING 0x0002 | ||
1569 | |||
1570 | /* | ||
1571 | * Lazy inode table initialization info | ||
1572 | */ | ||
1573 | struct ext4_lazy_init { | ||
1574 | unsigned long li_state; | ||
1575 | |||
1576 | wait_queue_head_t li_wait_daemon; | ||
1577 | wait_queue_head_t li_wait_task; | ||
1578 | struct timer_list li_timer; | ||
1579 | struct task_struct *li_task; | ||
1580 | |||
1581 | struct list_head li_request_list; | ||
1582 | struct mutex li_list_mtx; | ||
1583 | }; | ||
1584 | |||
1585 | struct ext4_li_request { | ||
1586 | struct super_block *lr_super; | ||
1587 | struct ext4_sb_info *lr_sbi; | ||
1588 | ext4_group_t lr_next_group; | ||
1589 | struct list_head lr_request; | ||
1590 | unsigned long lr_next_sched; | ||
1591 | unsigned long lr_timeout; | ||
1592 | }; | ||
1593 | |||
1594 | struct ext4_features { | ||
1595 | struct kobject f_kobj; | ||
1596 | struct completion f_kobj_unregister; | ||
1597 | }; | ||
1537 | 1598 | ||
1538 | /* | 1599 | /* |
1539 | * Function prototypes | 1600 | * Function prototypes |
@@ -1561,7 +1622,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | |||
1561 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 1622 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
1562 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 1623 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
1563 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1624 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
1564 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | ||
1565 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 1625 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
1566 | ext4_fsblk_t block, unsigned long count); | 1626 | ext4_fsblk_t block, unsigned long count); |
1567 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1627 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
@@ -1605,11 +1665,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | |||
1605 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1665 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1606 | extern unsigned long ext4_count_dirs(struct super_block *); | 1666 | extern unsigned long ext4_count_dirs(struct super_block *); |
1607 | extern void ext4_check_inodes_bitmap(struct super_block *); | 1667 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1608 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | 1668 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
1609 | struct buffer_head *bh, | 1669 | extern int ext4_init_inode_table(struct super_block *sb, |
1610 | ext4_group_t group, | 1670 | ext4_group_t group, int barrier); |
1611 | struct ext4_group_desc *desc); | ||
1612 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
1613 | 1671 | ||
1614 | /* mballoc.c */ | 1672 | /* mballoc.c */ |
1615 | extern long ext4_mb_stats; | 1673 | extern long ext4_mb_stats; |
@@ -1620,16 +1678,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | |||
1620 | struct ext4_allocation_request *, int *); | 1678 | struct ext4_allocation_request *, int *); |
1621 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1679 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1622 | extern void ext4_discard_preallocations(struct inode *); | 1680 | extern void ext4_discard_preallocations(struct inode *); |
1623 | extern int __init init_ext4_mballoc(void); | 1681 | extern int __init ext4_init_mballoc(void); |
1624 | extern void exit_ext4_mballoc(void); | 1682 | extern void ext4_exit_mballoc(void); |
1625 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 1683 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1626 | struct buffer_head *bh, ext4_fsblk_t block, | 1684 | struct buffer_head *bh, ext4_fsblk_t block, |
1627 | unsigned long count, int flags); | 1685 | unsigned long count, int flags); |
1628 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1686 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1629 | ext4_group_t i, struct ext4_group_desc *desc); | 1687 | ext4_group_t i, struct ext4_group_desc *desc); |
1630 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1688 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
1631 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1689 | |
1632 | ext4_group_t, int); | ||
1633 | /* inode.c */ | 1690 | /* inode.c */ |
1634 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 1691 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, |
1635 | ext4_lblk_t, int, int *); | 1692 | ext4_lblk_t, int, int *); |
@@ -1657,13 +1714,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *); | |||
1657 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1714 | extern int ext4_alloc_da_blocks(struct inode *inode); |
1658 | extern void ext4_set_aops(struct inode *inode); | 1715 | extern void ext4_set_aops(struct inode *inode); |
1659 | extern int ext4_writepage_trans_blocks(struct inode *); | 1716 | extern int ext4_writepage_trans_blocks(struct inode *); |
1660 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1661 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 1717 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
1662 | extern int ext4_block_truncate_page(handle_t *handle, | 1718 | extern int ext4_block_truncate_page(handle_t *handle, |
1663 | struct address_space *mapping, loff_t from); | 1719 | struct address_space *mapping, loff_t from); |
1664 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1720 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1665 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1721 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1666 | extern int flush_completed_IO(struct inode *inode); | ||
1667 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1722 | extern void ext4_da_update_reserve_space(struct inode *inode, |
1668 | int used, int quota_claim); | 1723 | int used, int quota_claim); |
1669 | /* ioctl.c */ | 1724 | /* ioctl.c */ |
@@ -1960,6 +2015,7 @@ extern const struct file_operations ext4_dir_operations; | |||
1960 | /* file.c */ | 2015 | /* file.c */ |
1961 | extern const struct inode_operations ext4_file_inode_operations; | 2016 | extern const struct inode_operations ext4_file_inode_operations; |
1962 | extern const struct file_operations ext4_file_operations; | 2017 | extern const struct file_operations ext4_file_operations; |
2018 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | ||
1963 | 2019 | ||
1964 | /* namei.c */ | 2020 | /* namei.c */ |
1965 | extern const struct inode_operations ext4_dir_inode_operations; | 2021 | extern const struct inode_operations ext4_dir_inode_operations; |
@@ -1973,8 +2029,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1973 | /* block_validity */ | 2029 | /* block_validity */ |
1974 | extern void ext4_release_system_zone(struct super_block *sb); | 2030 | extern void ext4_release_system_zone(struct super_block *sb); |
1975 | extern int ext4_setup_system_zone(struct super_block *sb); | 2031 | extern int ext4_setup_system_zone(struct super_block *sb); |
1976 | extern int __init init_ext4_system_zone(void); | 2032 | extern int __init ext4_init_system_zone(void); |
1977 | extern void exit_ext4_system_zone(void); | 2033 | extern void ext4_exit_system_zone(void); |
1978 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | 2034 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, |
1979 | ext4_fsblk_t start_blk, | 2035 | ext4_fsblk_t start_blk, |
1980 | unsigned int count); | 2036 | unsigned int count); |
@@ -2002,6 +2058,18 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2002 | __u64 start_orig, __u64 start_donor, | 2058 | __u64 start_orig, __u64 start_donor, |
2003 | __u64 len, __u64 *moved_len); | 2059 | __u64 len, __u64 *moved_len); |
2004 | 2060 | ||
2061 | /* page-io.c */ | ||
2062 | extern int __init ext4_init_pageio(void); | ||
2063 | extern void ext4_exit_pageio(void); | ||
2064 | extern void ext4_ioend_wait(struct inode *); | ||
2065 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2066 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | ||
2067 | extern int ext4_end_io_nolock(ext4_io_end_t *io); | ||
2068 | extern void ext4_io_submit(struct ext4_io_submit *io); | ||
2069 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | ||
2070 | struct page *page, | ||
2071 | int len, | ||
2072 | struct writeback_control *wbc); | ||
2005 | 2073 | ||
2006 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2074 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ |
2007 | enum ext4_state_bits { | 2075 | enum ext4_state_bits { |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index bdb6ce7e2eb4..28ce70fd9cd0 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) | |||
225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); | 225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); |
226 | } | 226 | } |
227 | 227 | ||
228 | /* | ||
229 | * ext4_ext_pblock: | ||
230 | * combine low and high parts of physical block number into ext4_fsblk_t | ||
231 | */ | ||
232 | static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex) | ||
233 | { | ||
234 | ext4_fsblk_t block; | ||
235 | |||
236 | block = le32_to_cpu(ex->ee_start_lo); | ||
237 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
238 | return block; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * ext4_idx_pblock: | ||
243 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
244 | */ | ||
245 | static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix) | ||
246 | { | ||
247 | ext4_fsblk_t block; | ||
248 | |||
249 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
250 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
251 | return block; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * ext4_ext_store_pblock: | ||
256 | * stores a large physical block number into an extent struct, | ||
257 | * breaking it into parts | ||
258 | */ | ||
259 | static inline void ext4_ext_store_pblock(struct ext4_extent *ex, | ||
260 | ext4_fsblk_t pb) | ||
261 | { | ||
262 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
263 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
264 | 0xffff); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * ext4_idx_store_pblock: | ||
269 | * stores a large physical block number into an index struct, | ||
270 | * breaking it into parts | ||
271 | */ | ||
272 | static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, | ||
273 | ext4_fsblk_t pb) | ||
274 | { | ||
275 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
276 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
277 | 0xffff); | ||
278 | } | ||
279 | |||
228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 280 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
229 | sector_t lblocks); | 281 | sector_t lblocks); |
230 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); | ||
231 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | ||
232 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | ||
233 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 282 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
234 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | 283 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
235 | int num, | 284 | int num, |
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | |||
237 | extern int ext4_can_extents_be_merged(struct inode *inode, | 286 | extern int ext4_can_extents_be_merged(struct inode *inode, |
238 | struct ext4_extent *ex1, | 287 | struct ext4_extent *ex1, |
239 | struct ext4_extent *ex2); | 288 | struct ext4_extent *ex2); |
240 | extern int ext4_ext_try_to_merge(struct inode *inode, | ||
241 | struct ext4_ext_path *path, | ||
242 | struct ext4_extent *); | ||
243 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | ||
244 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); | 289 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); |
245 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
246 | ext_prepare_callback, void *); | ||
247 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 290 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
248 | struct ext4_ext_path *); | 291 | struct ext4_ext_path *); |
249 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | ||
250 | ext4_lblk_t *, ext4_fsblk_t *); | ||
251 | extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, | ||
252 | ext4_lblk_t *, ext4_fsblk_t *); | ||
253 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 292 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
254 | extern int ext4_ext_check_inode(struct inode *inode); | 293 | extern int ext4_ext_check_inode(struct inode *inode); |
255 | #endif /* _EXT4_EXTENTS */ | 294 | #endif /* _EXT4_EXTENTS */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 06328d3e5717..0554c48cb1fd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,55 +44,6 @@ | |||
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | 46 | ||
47 | |||
48 | /* | ||
49 | * ext_pblock: | ||
50 | * combine low and high parts of physical block number into ext4_fsblk_t | ||
51 | */ | ||
52 | ext4_fsblk_t ext_pblock(struct ext4_extent *ex) | ||
53 | { | ||
54 | ext4_fsblk_t block; | ||
55 | |||
56 | block = le32_to_cpu(ex->ee_start_lo); | ||
57 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
58 | return block; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * idx_pblock: | ||
63 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
64 | */ | ||
65 | ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) | ||
66 | { | ||
67 | ext4_fsblk_t block; | ||
68 | |||
69 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
70 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
71 | return block; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * ext4_ext_store_pblock: | ||
76 | * stores a large physical block number into an extent struct, | ||
77 | * breaking it into parts | ||
78 | */ | ||
79 | void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) | ||
80 | { | ||
81 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
82 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * ext4_idx_store_pblock: | ||
87 | * stores a large physical block number into an index struct, | ||
88 | * breaking it into parts | ||
89 | */ | ||
90 | static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | ||
91 | { | ||
92 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
94 | } | ||
95 | |||
96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 47 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
97 | struct inode *inode, | 48 | struct inode *inode, |
98 | int needed) | 49 | int needed) |
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
169 | /* try to predict block placement */ | 120 | /* try to predict block placement */ |
170 | ex = path[depth].p_ext; | 121 | ex = path[depth].p_ext; |
171 | if (ex) | 122 | if (ex) |
172 | return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); | 123 | return (ext4_ext_pblock(ex) + |
124 | (block - le32_to_cpu(ex->ee_block))); | ||
173 | 125 | ||
174 | /* it looks like index is empty; | 126 | /* it looks like index is empty; |
175 | * try to find starting block from index itself */ | 127 | * try to find starting block from index itself */ |
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
354 | 306 | ||
355 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 307 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
356 | { | 308 | { |
357 | ext4_fsblk_t block = ext_pblock(ext); | 309 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
358 | int len = ext4_ext_get_actual_len(ext); | 310 | int len = ext4_ext_get_actual_len(ext); |
359 | 311 | ||
360 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 312 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
363 | static int ext4_valid_extent_idx(struct inode *inode, | 315 | static int ext4_valid_extent_idx(struct inode *inode, |
364 | struct ext4_extent_idx *ext_idx) | 316 | struct ext4_extent_idx *ext_idx) |
365 | { | 317 | { |
366 | ext4_fsblk_t block = idx_pblock(ext_idx); | 318 | ext4_fsblk_t block = ext4_idx_pblock(ext_idx); |
367 | 319 | ||
368 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); | 320 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); |
369 | } | 321 | } |
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
463 | for (k = 0; k <= l; k++, path++) { | 415 | for (k = 0; k <= l; k++, path++) { |
464 | if (path->p_idx) { | 416 | if (path->p_idx) { |
465 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 417 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
466 | idx_pblock(path->p_idx)); | 418 | ext4_idx_pblock(path->p_idx)); |
467 | } else if (path->p_ext) { | 419 | } else if (path->p_ext) { |
468 | ext_debug(" %d:[%d]%d:%llu ", | 420 | ext_debug(" %d:[%d]%d:%llu ", |
469 | le32_to_cpu(path->p_ext->ee_block), | 421 | le32_to_cpu(path->p_ext->ee_block), |
470 | ext4_ext_is_uninitialized(path->p_ext), | 422 | ext4_ext_is_uninitialized(path->p_ext), |
471 | ext4_ext_get_actual_len(path->p_ext), | 423 | ext4_ext_get_actual_len(path->p_ext), |
472 | ext_pblock(path->p_ext)); | 424 | ext4_ext_pblock(path->p_ext)); |
473 | } else | 425 | } else |
474 | ext_debug(" []"); | 426 | ext_debug(" []"); |
475 | } | 427 | } |
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
494 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 446 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
495 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), | 447 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
496 | ext4_ext_is_uninitialized(ex), | 448 | ext4_ext_is_uninitialized(ex), |
497 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 449 | ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); |
498 | } | 450 | } |
499 | ext_debug("\n"); | 451 | ext_debug("\n"); |
500 | } | 452 | } |
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
545 | 497 | ||
546 | path->p_idx = l - 1; | 498 | path->p_idx = l - 1; |
547 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), | 499 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), |
548 | idx_pblock(path->p_idx)); | 500 | ext4_idx_pblock(path->p_idx)); |
549 | 501 | ||
550 | #ifdef CHECK_BINSEARCH | 502 | #ifdef CHECK_BINSEARCH |
551 | { | 503 | { |
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode, | |||
614 | path->p_ext = l - 1; | 566 | path->p_ext = l - 1; |
615 | ext_debug(" -> %d:%llu:[%d]%d ", | 567 | ext_debug(" -> %d:%llu:[%d]%d ", |
616 | le32_to_cpu(path->p_ext->ee_block), | 568 | le32_to_cpu(path->p_ext->ee_block), |
617 | ext_pblock(path->p_ext), | 569 | ext4_ext_pblock(path->p_ext), |
618 | ext4_ext_is_uninitialized(path->p_ext), | 570 | ext4_ext_is_uninitialized(path->p_ext), |
619 | ext4_ext_get_actual_len(path->p_ext)); | 571 | ext4_ext_get_actual_len(path->p_ext)); |
620 | 572 | ||
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
682 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); | 634 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); |
683 | 635 | ||
684 | ext4_ext_binsearch_idx(inode, path + ppos, block); | 636 | ext4_ext_binsearch_idx(inode, path + ppos, block); |
685 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 637 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
686 | path[ppos].p_depth = i; | 638 | path[ppos].p_depth = i; |
687 | path[ppos].p_ext = NULL; | 639 | path[ppos].p_ext = NULL; |
688 | 640 | ||
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
721 | ext4_ext_binsearch(inode, path + ppos, block); | 673 | ext4_ext_binsearch(inode, path + ppos, block); |
722 | /* if not an empty leaf */ | 674 | /* if not an empty leaf */ |
723 | if (path[ppos].p_ext) | 675 | if (path[ppos].p_ext) |
724 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 676 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
725 | 677 | ||
726 | ext4_ext_show_path(inode, path); | 678 | ext4_ext_show_path(inode, path); |
727 | 679 | ||
@@ -739,9 +691,9 @@ err: | |||
739 | * insert new index [@logical;@ptr] into the block at @curp; | 691 | * insert new index [@logical;@ptr] into the block at @curp; |
740 | * check where to insert: before @curp or after @curp | 692 | * check where to insert: before @curp or after @curp |
741 | */ | 693 | */ |
742 | int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | 694 | static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, |
743 | struct ext4_ext_path *curp, | 695 | struct ext4_ext_path *curp, |
744 | int logical, ext4_fsblk_t ptr) | 696 | int logical, ext4_fsblk_t ptr) |
745 | { | 697 | { |
746 | struct ext4_extent_idx *ix; | 698 | struct ext4_extent_idx *ix; |
747 | int len, err; | 699 | int len, err; |
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
917 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | 869 | EXT_MAX_EXTENT(path[depth].p_hdr)) { |
918 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", | 870 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", |
919 | le32_to_cpu(path[depth].p_ext->ee_block), | 871 | le32_to_cpu(path[depth].p_ext->ee_block), |
920 | ext_pblock(path[depth].p_ext), | 872 | ext4_ext_pblock(path[depth].p_ext), |
921 | ext4_ext_is_uninitialized(path[depth].p_ext), | 873 | ext4_ext_is_uninitialized(path[depth].p_ext), |
922 | ext4_ext_get_actual_len(path[depth].p_ext), | 874 | ext4_ext_get_actual_len(path[depth].p_ext), |
923 | newblock); | 875 | newblock); |
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
1007 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { | 959 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { |
1008 | ext_debug("%d: move %d:%llu in new index %llu\n", i, | 960 | ext_debug("%d: move %d:%llu in new index %llu\n", i, |
1009 | le32_to_cpu(path[i].p_idx->ei_block), | 961 | le32_to_cpu(path[i].p_idx->ei_block), |
1010 | idx_pblock(path[i].p_idx), | 962 | ext4_idx_pblock(path[i].p_idx), |
1011 | newblock); | 963 | newblock); |
1012 | /*memmove(++fidx, path[i].p_idx++, | 964 | /*memmove(++fidx, path[i].p_idx++, |
1013 | sizeof(struct ext4_extent_idx)); | 965 | sizeof(struct ext4_extent_idx)); |
@@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1146 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", | 1098 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", |
1147 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), | 1099 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), |
1148 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1100 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1149 | idx_pblock(EXT_FIRST_INDEX(neh))); | 1101 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
1150 | 1102 | ||
1151 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); | 1103 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); |
1152 | err = ext4_ext_dirty(handle, inode, curp); | 1104 | err = ext4_ext_dirty(handle, inode, curp); |
@@ -1232,9 +1184,9 @@ out: | |||
1232 | * returns 0 at @phys | 1184 | * returns 0 at @phys |
1233 | * return value contains 0 (success) or error code | 1185 | * return value contains 0 (success) or error code |
1234 | */ | 1186 | */ |
1235 | int | 1187 | static int ext4_ext_search_left(struct inode *inode, |
1236 | ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | 1188 | struct ext4_ext_path *path, |
1237 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1189 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1238 | { | 1190 | { |
1239 | struct ext4_extent_idx *ix; | 1191 | struct ext4_extent_idx *ix; |
1240 | struct ext4_extent *ex; | 1192 | struct ext4_extent *ex; |
@@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1286 | } | 1238 | } |
1287 | 1239 | ||
1288 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; | 1240 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; |
1289 | *phys = ext_pblock(ex) + ee_len - 1; | 1241 | *phys = ext4_ext_pblock(ex) + ee_len - 1; |
1290 | return 0; | 1242 | return 0; |
1291 | } | 1243 | } |
1292 | 1244 | ||
@@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1297 | * returns 0 at @phys | 1249 | * returns 0 at @phys |
1298 | * return value contains 0 (success) or error code | 1250 | * return value contains 0 (success) or error code |
1299 | */ | 1251 | */ |
1300 | int | 1252 | static int ext4_ext_search_right(struct inode *inode, |
1301 | ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | 1253 | struct ext4_ext_path *path, |
1302 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1254 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1303 | { | 1255 | { |
1304 | struct buffer_head *bh = NULL; | 1256 | struct buffer_head *bh = NULL; |
1305 | struct ext4_extent_header *eh; | 1257 | struct ext4_extent_header *eh; |
@@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1342 | } | 1294 | } |
1343 | } | 1295 | } |
1344 | *logical = le32_to_cpu(ex->ee_block); | 1296 | *logical = le32_to_cpu(ex->ee_block); |
1345 | *phys = ext_pblock(ex); | 1297 | *phys = ext4_ext_pblock(ex); |
1346 | return 0; | 1298 | return 0; |
1347 | } | 1299 | } |
1348 | 1300 | ||
@@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1357 | /* next allocated block in this leaf */ | 1309 | /* next allocated block in this leaf */ |
1358 | ex++; | 1310 | ex++; |
1359 | *logical = le32_to_cpu(ex->ee_block); | 1311 | *logical = le32_to_cpu(ex->ee_block); |
1360 | *phys = ext_pblock(ex); | 1312 | *phys = ext4_ext_pblock(ex); |
1361 | return 0; | 1313 | return 0; |
1362 | } | 1314 | } |
1363 | 1315 | ||
@@ -1376,7 +1328,7 @@ got_index: | |||
1376 | * follow it and find the closest allocated | 1328 | * follow it and find the closest allocated |
1377 | * block to the right */ | 1329 | * block to the right */ |
1378 | ix++; | 1330 | ix++; |
1379 | block = idx_pblock(ix); | 1331 | block = ext4_idx_pblock(ix); |
1380 | while (++depth < path->p_depth) { | 1332 | while (++depth < path->p_depth) { |
1381 | bh = sb_bread(inode->i_sb, block); | 1333 | bh = sb_bread(inode->i_sb, block); |
1382 | if (bh == NULL) | 1334 | if (bh == NULL) |
@@ -1388,7 +1340,7 @@ got_index: | |||
1388 | return -EIO; | 1340 | return -EIO; |
1389 | } | 1341 | } |
1390 | ix = EXT_FIRST_INDEX(eh); | 1342 | ix = EXT_FIRST_INDEX(eh); |
1391 | block = idx_pblock(ix); | 1343 | block = ext4_idx_pblock(ix); |
1392 | put_bh(bh); | 1344 | put_bh(bh); |
1393 | } | 1345 | } |
1394 | 1346 | ||
@@ -1402,7 +1354,7 @@ got_index: | |||
1402 | } | 1354 | } |
1403 | ex = EXT_FIRST_EXTENT(eh); | 1355 | ex = EXT_FIRST_EXTENT(eh); |
1404 | *logical = le32_to_cpu(ex->ee_block); | 1356 | *logical = le32_to_cpu(ex->ee_block); |
1405 | *phys = ext_pblock(ex); | 1357 | *phys = ext4_ext_pblock(ex); |
1406 | put_bh(bh); | 1358 | put_bh(bh); |
1407 | return 0; | 1359 | return 0; |
1408 | } | 1360 | } |
@@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1573 | return 0; | 1525 | return 0; |
1574 | #endif | 1526 | #endif |
1575 | 1527 | ||
1576 | if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) | 1528 | if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) |
1577 | return 1; | 1529 | return 1; |
1578 | return 0; | 1530 | return 0; |
1579 | } | 1531 | } |
@@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1585 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns | 1537 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns |
1586 | * 1 if they got merged. | 1538 | * 1 if they got merged. |
1587 | */ | 1539 | */ |
1588 | int ext4_ext_try_to_merge(struct inode *inode, | 1540 | static int ext4_ext_try_to_merge(struct inode *inode, |
1589 | struct ext4_ext_path *path, | 1541 | struct ext4_ext_path *path, |
1590 | struct ext4_extent *ex) | 1542 | struct ext4_extent *ex) |
1591 | { | 1543 | { |
1592 | struct ext4_extent_header *eh; | 1544 | struct ext4_extent_header *eh; |
1593 | unsigned int depth, len; | 1545 | unsigned int depth, len; |
@@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1632 | * such that there will be no overlap, and then returns 1. | 1584 | * such that there will be no overlap, and then returns 1. |
1633 | * If there is no overlap found, it returns 0. | 1585 | * If there is no overlap found, it returns 0. |
1634 | */ | 1586 | */ |
1635 | unsigned int ext4_ext_check_overlap(struct inode *inode, | 1587 | static unsigned int ext4_ext_check_overlap(struct inode *inode, |
1636 | struct ext4_extent *newext, | 1588 | struct ext4_extent *newext, |
1637 | struct ext4_ext_path *path) | 1589 | struct ext4_ext_path *path) |
1638 | { | 1590 | { |
1639 | ext4_lblk_t b1, b2; | 1591 | ext4_lblk_t b1, b2; |
1640 | unsigned int depth, len1; | 1592 | unsigned int depth, len1; |
@@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1706 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) | 1658 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) |
1707 | && ext4_can_extents_be_merged(inode, ex, newext)) { | 1659 | && ext4_can_extents_be_merged(inode, ex, newext)) { |
1708 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", | 1660 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1709 | ext4_ext_is_uninitialized(newext), | 1661 | ext4_ext_is_uninitialized(newext), |
1710 | ext4_ext_get_actual_len(newext), | 1662 | ext4_ext_get_actual_len(newext), |
1711 | le32_to_cpu(ex->ee_block), | 1663 | le32_to_cpu(ex->ee_block), |
1712 | ext4_ext_is_uninitialized(ex), | 1664 | ext4_ext_is_uninitialized(ex), |
1713 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1665 | ext4_ext_get_actual_len(ex), |
1666 | ext4_ext_pblock(ex)); | ||
1714 | err = ext4_ext_get_access(handle, inode, path + depth); | 1667 | err = ext4_ext_get_access(handle, inode, path + depth); |
1715 | if (err) | 1668 | if (err) |
1716 | return err; | 1669 | return err; |
@@ -1780,7 +1733,7 @@ has_space: | |||
1780 | /* there is no extent in this leaf, create first one */ | 1733 | /* there is no extent in this leaf, create first one */ |
1781 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", | 1734 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
1782 | le32_to_cpu(newext->ee_block), | 1735 | le32_to_cpu(newext->ee_block), |
1783 | ext_pblock(newext), | 1736 | ext4_ext_pblock(newext), |
1784 | ext4_ext_is_uninitialized(newext), | 1737 | ext4_ext_is_uninitialized(newext), |
1785 | ext4_ext_get_actual_len(newext)); | 1738 | ext4_ext_get_actual_len(newext)); |
1786 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1739 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
@@ -1794,7 +1747,7 @@ has_space: | |||
1794 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " | 1747 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
1795 | "move %d from 0x%p to 0x%p\n", | 1748 | "move %d from 0x%p to 0x%p\n", |
1796 | le32_to_cpu(newext->ee_block), | 1749 | le32_to_cpu(newext->ee_block), |
1797 | ext_pblock(newext), | 1750 | ext4_ext_pblock(newext), |
1798 | ext4_ext_is_uninitialized(newext), | 1751 | ext4_ext_is_uninitialized(newext), |
1799 | ext4_ext_get_actual_len(newext), | 1752 | ext4_ext_get_actual_len(newext), |
1800 | nearex, len, nearex + 1, nearex + 2); | 1753 | nearex, len, nearex + 1, nearex + 2); |
@@ -1808,7 +1761,7 @@ has_space: | |||
1808 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " | 1761 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
1809 | "move %d from 0x%p to 0x%p\n", | 1762 | "move %d from 0x%p to 0x%p\n", |
1810 | le32_to_cpu(newext->ee_block), | 1763 | le32_to_cpu(newext->ee_block), |
1811 | ext_pblock(newext), | 1764 | ext4_ext_pblock(newext), |
1812 | ext4_ext_is_uninitialized(newext), | 1765 | ext4_ext_is_uninitialized(newext), |
1813 | ext4_ext_get_actual_len(newext), | 1766 | ext4_ext_get_actual_len(newext), |
1814 | nearex, len, nearex + 1, nearex + 2); | 1767 | nearex, len, nearex + 1, nearex + 2); |
@@ -1819,7 +1772,7 @@ has_space: | |||
1819 | le16_add_cpu(&eh->eh_entries, 1); | 1772 | le16_add_cpu(&eh->eh_entries, 1); |
1820 | nearex = path[depth].p_ext; | 1773 | nearex = path[depth].p_ext; |
1821 | nearex->ee_block = newext->ee_block; | 1774 | nearex->ee_block = newext->ee_block; |
1822 | ext4_ext_store_pblock(nearex, ext_pblock(newext)); | 1775 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); |
1823 | nearex->ee_len = newext->ee_len; | 1776 | nearex->ee_len = newext->ee_len; |
1824 | 1777 | ||
1825 | merge: | 1778 | merge: |
@@ -1845,9 +1798,9 @@ cleanup: | |||
1845 | return err; | 1798 | return err; |
1846 | } | 1799 | } |
1847 | 1800 | ||
1848 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | 1801 | static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, |
1849 | ext4_lblk_t num, ext_prepare_callback func, | 1802 | ext4_lblk_t num, ext_prepare_callback func, |
1850 | void *cbdata) | 1803 | void *cbdata) |
1851 | { | 1804 | { |
1852 | struct ext4_ext_path *path = NULL; | 1805 | struct ext4_ext_path *path = NULL; |
1853 | struct ext4_ext_cache cbex; | 1806 | struct ext4_ext_cache cbex; |
@@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1923 | } else { | 1876 | } else { |
1924 | cbex.ec_block = le32_to_cpu(ex->ee_block); | 1877 | cbex.ec_block = le32_to_cpu(ex->ee_block); |
1925 | cbex.ec_len = ext4_ext_get_actual_len(ex); | 1878 | cbex.ec_len = ext4_ext_get_actual_len(ex); |
1926 | cbex.ec_start = ext_pblock(ex); | 1879 | cbex.ec_start = ext4_ext_pblock(ex); |
1927 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | 1880 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; |
1928 | } | 1881 | } |
1929 | 1882 | ||
@@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2073 | 2026 | ||
2074 | /* free index block */ | 2027 | /* free index block */ |
2075 | path--; | 2028 | path--; |
2076 | leaf = idx_pblock(path->p_idx); | 2029 | leaf = ext4_idx_pblock(path->p_idx); |
2077 | if (unlikely(path->p_hdr->eh_entries == 0)) { | 2030 | if (unlikely(path->p_hdr->eh_entries == 0)) { |
2078 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); | 2031 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); |
2079 | return -EIO; | 2032 | return -EIO; |
@@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2181 | ext4_fsblk_t start; | 2134 | ext4_fsblk_t start; |
2182 | 2135 | ||
2183 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2136 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2184 | start = ext_pblock(ex) + ee_len - num; | 2137 | start = ext4_ext_pblock(ex) + ee_len - num; |
2185 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2138 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2186 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2139 | ext4_free_blocks(handle, inode, 0, start, num, flags); |
2187 | } else if (from == le32_to_cpu(ex->ee_block) | 2140 | } else if (from == le32_to_cpu(ex->ee_block) |
@@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2310 | goto out; | 2263 | goto out; |
2311 | 2264 | ||
2312 | ext_debug("new extent: %u:%u:%llu\n", block, num, | 2265 | ext_debug("new extent: %u:%u:%llu\n", block, num, |
2313 | ext_pblock(ex)); | 2266 | ext4_ext_pblock(ex)); |
2314 | ex--; | 2267 | ex--; |
2315 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2268 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2316 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2269 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2421,9 +2374,9 @@ again: | |||
2421 | struct buffer_head *bh; | 2374 | struct buffer_head *bh; |
2422 | /* go to the next level */ | 2375 | /* go to the next level */ |
2423 | ext_debug("move to level %d (block %llu)\n", | 2376 | ext_debug("move to level %d (block %llu)\n", |
2424 | i + 1, idx_pblock(path[i].p_idx)); | 2377 | i + 1, ext4_idx_pblock(path[i].p_idx)); |
2425 | memset(path + i + 1, 0, sizeof(*path)); | 2378 | memset(path + i + 1, 0, sizeof(*path)); |
2426 | bh = sb_bread(sb, idx_pblock(path[i].p_idx)); | 2379 | bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); |
2427 | if (!bh) { | 2380 | if (!bh) { |
2428 | /* should we reset i_size? */ | 2381 | /* should we reset i_size? */ |
2429 | err = -EIO; | 2382 | err = -EIO; |
@@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb) | |||
2535 | #endif | 2488 | #endif |
2536 | } | 2489 | } |
2537 | 2490 | ||
2538 | static void bi_complete(struct bio *bio, int error) | ||
2539 | { | ||
2540 | complete((struct completion *)bio->bi_private); | ||
2541 | } | ||
2542 | |||
2543 | /* FIXME!! we need to try to merge to left or right after zero-out */ | 2491 | /* FIXME!! we need to try to merge to left or right after zero-out */ |
2544 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | 2492 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) |
2545 | { | 2493 | { |
2494 | ext4_fsblk_t ee_pblock; | ||
2495 | unsigned int ee_len; | ||
2546 | int ret; | 2496 | int ret; |
2547 | struct bio *bio; | ||
2548 | int blkbits, blocksize; | ||
2549 | sector_t ee_pblock; | ||
2550 | struct completion event; | ||
2551 | unsigned int ee_len, len, done, offset; | ||
2552 | 2497 | ||
2553 | |||
2554 | blkbits = inode->i_blkbits; | ||
2555 | blocksize = inode->i_sb->s_blocksize; | ||
2556 | ee_len = ext4_ext_get_actual_len(ex); | 2498 | ee_len = ext4_ext_get_actual_len(ex); |
2557 | ee_pblock = ext_pblock(ex); | 2499 | ee_pblock = ext4_ext_pblock(ex); |
2558 | |||
2559 | /* convert ee_pblock to 512 byte sectors */ | ||
2560 | ee_pblock = ee_pblock << (blkbits - 9); | ||
2561 | |||
2562 | while (ee_len > 0) { | ||
2563 | |||
2564 | if (ee_len > BIO_MAX_PAGES) | ||
2565 | len = BIO_MAX_PAGES; | ||
2566 | else | ||
2567 | len = ee_len; | ||
2568 | |||
2569 | bio = bio_alloc(GFP_NOIO, len); | ||
2570 | if (!bio) | ||
2571 | return -ENOMEM; | ||
2572 | |||
2573 | bio->bi_sector = ee_pblock; | ||
2574 | bio->bi_bdev = inode->i_sb->s_bdev; | ||
2575 | |||
2576 | done = 0; | ||
2577 | offset = 0; | ||
2578 | while (done < len) { | ||
2579 | ret = bio_add_page(bio, ZERO_PAGE(0), | ||
2580 | blocksize, offset); | ||
2581 | if (ret != blocksize) { | ||
2582 | /* | ||
2583 | * We can't add any more pages because of | ||
2584 | * hardware limitations. Start a new bio. | ||
2585 | */ | ||
2586 | break; | ||
2587 | } | ||
2588 | done++; | ||
2589 | offset += blocksize; | ||
2590 | if (offset >= PAGE_CACHE_SIZE) | ||
2591 | offset = 0; | ||
2592 | } | ||
2593 | 2500 | ||
2594 | init_completion(&event); | 2501 | ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); |
2595 | bio->bi_private = &event; | 2502 | if (ret > 0) |
2596 | bio->bi_end_io = bi_complete; | 2503 | ret = 0; |
2597 | submit_bio(WRITE, bio); | ||
2598 | wait_for_completion(&event); | ||
2599 | 2504 | ||
2600 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 2505 | return ret; |
2601 | bio_put(bio); | ||
2602 | return -EIO; | ||
2603 | } | ||
2604 | bio_put(bio); | ||
2605 | ee_len -= done; | ||
2606 | ee_pblock += done << (blkbits - 9); | ||
2607 | } | ||
2608 | return 0; | ||
2609 | } | 2506 | } |
2610 | 2507 | ||
2611 | #define EXT4_EXT_ZERO_LEN 7 | 2508 | #define EXT4_EXT_ZERO_LEN 7 |
@@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2651 | ee_block = le32_to_cpu(ex->ee_block); | 2548 | ee_block = le32_to_cpu(ex->ee_block); |
2652 | ee_len = ext4_ext_get_actual_len(ex); | 2549 | ee_len = ext4_ext_get_actual_len(ex); |
2653 | allocated = ee_len - (map->m_lblk - ee_block); | 2550 | allocated = ee_len - (map->m_lblk - ee_block); |
2654 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | 2551 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); |
2655 | 2552 | ||
2656 | ex2 = ex; | 2553 | ex2 = ex; |
2657 | orig_ex.ee_block = ex->ee_block; | 2554 | orig_ex.ee_block = ex->ee_block; |
2658 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2555 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2659 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2556 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); |
2660 | 2557 | ||
2661 | /* | 2558 | /* |
2662 | * It is safe to convert extent to initialized via explicit | 2559 | * It is safe to convert extent to initialized via explicit |
@@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2675 | /* update the extent length and mark as initialized */ | 2572 | /* update the extent length and mark as initialized */ |
2676 | ex->ee_block = orig_ex.ee_block; | 2573 | ex->ee_block = orig_ex.ee_block; |
2677 | ex->ee_len = orig_ex.ee_len; | 2574 | ex->ee_len = orig_ex.ee_len; |
2678 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2575 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2679 | ext4_ext_dirty(handle, inode, path + depth); | 2576 | ext4_ext_dirty(handle, inode, path + depth); |
2680 | /* zeroed the full extent */ | 2577 | /* zeroed the full extent */ |
2681 | return allocated; | 2578 | return allocated; |
@@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2710 | ex->ee_block = orig_ex.ee_block; | 2607 | ex->ee_block = orig_ex.ee_block; |
2711 | ex->ee_len = cpu_to_le16(ee_len - allocated); | 2608 | ex->ee_len = cpu_to_le16(ee_len - allocated); |
2712 | ext4_ext_mark_uninitialized(ex); | 2609 | ext4_ext_mark_uninitialized(ex); |
2713 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2610 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2714 | ext4_ext_dirty(handle, inode, path + depth); | 2611 | ext4_ext_dirty(handle, inode, path + depth); |
2715 | 2612 | ||
2716 | ex3 = &newex; | 2613 | ex3 = &newex; |
@@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2725 | goto fix_extent_len; | 2622 | goto fix_extent_len; |
2726 | ex->ee_block = orig_ex.ee_block; | 2623 | ex->ee_block = orig_ex.ee_block; |
2727 | ex->ee_len = orig_ex.ee_len; | 2624 | ex->ee_len = orig_ex.ee_len; |
2728 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2625 | ext4_ext_store_pblock(ex, |
2626 | ext4_ext_pblock(&orig_ex)); | ||
2729 | ext4_ext_dirty(handle, inode, path + depth); | 2627 | ext4_ext_dirty(handle, inode, path + depth); |
2730 | /* blocks available from map->m_lblk */ | 2628 | /* blocks available from map->m_lblk */ |
2731 | return allocated; | 2629 | return allocated; |
@@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2782 | /* update the extent length and mark as initialized */ | 2680 | /* update the extent length and mark as initialized */ |
2783 | ex->ee_block = orig_ex.ee_block; | 2681 | ex->ee_block = orig_ex.ee_block; |
2784 | ex->ee_len = orig_ex.ee_len; | 2682 | ex->ee_len = orig_ex.ee_len; |
2785 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2683 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2786 | ext4_ext_dirty(handle, inode, path + depth); | 2684 | ext4_ext_dirty(handle, inode, path + depth); |
2787 | /* zeroed the full extent */ | 2685 | /* zeroed the full extent */ |
2788 | /* blocks available from map->m_lblk */ | 2686 | /* blocks available from map->m_lblk */ |
@@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2833 | /* update the extent length and mark as initialized */ | 2731 | /* update the extent length and mark as initialized */ |
2834 | ex->ee_block = orig_ex.ee_block; | 2732 | ex->ee_block = orig_ex.ee_block; |
2835 | ex->ee_len = orig_ex.ee_len; | 2733 | ex->ee_len = orig_ex.ee_len; |
2836 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2734 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2837 | ext4_ext_dirty(handle, inode, path + depth); | 2735 | ext4_ext_dirty(handle, inode, path + depth); |
2838 | /* zero out the first half */ | 2736 | /* zero out the first half */ |
2839 | /* blocks available from map->m_lblk */ | 2737 | /* blocks available from map->m_lblk */ |
@@ -2902,7 +2800,7 @@ insert: | |||
2902 | /* update the extent length and mark as initialized */ | 2800 | /* update the extent length and mark as initialized */ |
2903 | ex->ee_block = orig_ex.ee_block; | 2801 | ex->ee_block = orig_ex.ee_block; |
2904 | ex->ee_len = orig_ex.ee_len; | 2802 | ex->ee_len = orig_ex.ee_len; |
2905 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2803 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2906 | ext4_ext_dirty(handle, inode, path + depth); | 2804 | ext4_ext_dirty(handle, inode, path + depth); |
2907 | /* zero out the first half */ | 2805 | /* zero out the first half */ |
2908 | return allocated; | 2806 | return allocated; |
@@ -2915,7 +2813,7 @@ out: | |||
2915 | fix_extent_len: | 2813 | fix_extent_len: |
2916 | ex->ee_block = orig_ex.ee_block; | 2814 | ex->ee_block = orig_ex.ee_block; |
2917 | ex->ee_len = orig_ex.ee_len; | 2815 | ex->ee_len = orig_ex.ee_len; |
2918 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2816 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2919 | ext4_ext_mark_uninitialized(ex); | 2817 | ext4_ext_mark_uninitialized(ex); |
2920 | ext4_ext_dirty(handle, inode, path + depth); | 2818 | ext4_ext_dirty(handle, inode, path + depth); |
2921 | return err; | 2819 | return err; |
@@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2973 | ee_block = le32_to_cpu(ex->ee_block); | 2871 | ee_block = le32_to_cpu(ex->ee_block); |
2974 | ee_len = ext4_ext_get_actual_len(ex); | 2872 | ee_len = ext4_ext_get_actual_len(ex); |
2975 | allocated = ee_len - (map->m_lblk - ee_block); | 2873 | allocated = ee_len - (map->m_lblk - ee_block); |
2976 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | 2874 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); |
2977 | 2875 | ||
2978 | ex2 = ex; | 2876 | ex2 = ex; |
2979 | orig_ex.ee_block = ex->ee_block; | 2877 | orig_ex.ee_block = ex->ee_block; |
2980 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2878 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2981 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2879 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); |
2982 | 2880 | ||
2983 | /* | 2881 | /* |
2984 | * It is safe to convert extent to initialized via explicit | 2882 | * It is safe to convert extent to initialized via explicit |
@@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3027 | /* update the extent length and mark as initialized */ | 2925 | /* update the extent length and mark as initialized */ |
3028 | ex->ee_block = orig_ex.ee_block; | 2926 | ex->ee_block = orig_ex.ee_block; |
3029 | ex->ee_len = orig_ex.ee_len; | 2927 | ex->ee_len = orig_ex.ee_len; |
3030 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2928 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3031 | ext4_ext_dirty(handle, inode, path + depth); | 2929 | ext4_ext_dirty(handle, inode, path + depth); |
3032 | /* zeroed the full extent */ | 2930 | /* zeroed the full extent */ |
3033 | /* blocks available from map->m_lblk */ | 2931 | /* blocks available from map->m_lblk */ |
@@ -3099,7 +2997,7 @@ insert: | |||
3099 | /* update the extent length and mark as initialized */ | 2997 | /* update the extent length and mark as initialized */ |
3100 | ex->ee_block = orig_ex.ee_block; | 2998 | ex->ee_block = orig_ex.ee_block; |
3101 | ex->ee_len = orig_ex.ee_len; | 2999 | ex->ee_len = orig_ex.ee_len; |
3102 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3000 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3103 | ext4_ext_dirty(handle, inode, path + depth); | 3001 | ext4_ext_dirty(handle, inode, path + depth); |
3104 | /* zero out the first half */ | 3002 | /* zero out the first half */ |
3105 | return allocated; | 3003 | return allocated; |
@@ -3112,7 +3010,7 @@ out: | |||
3112 | fix_extent_len: | 3010 | fix_extent_len: |
3113 | ex->ee_block = orig_ex.ee_block; | 3011 | ex->ee_block = orig_ex.ee_block; |
3114 | ex->ee_len = orig_ex.ee_len; | 3012 | ex->ee_len = orig_ex.ee_len; |
3115 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3013 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3116 | ext4_ext_mark_uninitialized(ex); | 3014 | ext4_ext_mark_uninitialized(ex); |
3117 | ext4_ext_dirty(handle, inode, path + depth); | 3015 | ext4_ext_dirty(handle, inode, path + depth); |
3118 | return err; | 3016 | return err; |
@@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, | |||
3180 | unmap_underlying_metadata(bdev, block + i); | 3078 | unmap_underlying_metadata(bdev, block + i); |
3181 | } | 3079 | } |
3182 | 3080 | ||
3081 | /* | ||
3082 | * Handle EOFBLOCKS_FL flag, clearing it if necessary | ||
3083 | */ | ||
3084 | static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | ||
3085 | struct ext4_map_blocks *map, | ||
3086 | struct ext4_ext_path *path, | ||
3087 | unsigned int len) | ||
3088 | { | ||
3089 | int i, depth; | ||
3090 | struct ext4_extent_header *eh; | ||
3091 | struct ext4_extent *ex, *last_ex; | ||
3092 | |||
3093 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
3094 | return 0; | ||
3095 | |||
3096 | depth = ext_depth(inode); | ||
3097 | eh = path[depth].p_hdr; | ||
3098 | ex = path[depth].p_ext; | ||
3099 | |||
3100 | if (unlikely(!eh->eh_entries)) { | ||
3101 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | ||
3102 | "EOFBLOCKS_FL set"); | ||
3103 | return -EIO; | ||
3104 | } | ||
3105 | last_ex = EXT_LAST_EXTENT(eh); | ||
3106 | /* | ||
3107 | * We should clear the EOFBLOCKS_FL flag if we are writing the | ||
3108 | * last block in the last extent in the file. We test this by | ||
3109 | * first checking to see if the caller to | ||
3110 | * ext4_ext_get_blocks() was interested in the last block (or | ||
3111 | * a block beyond the last block) in the current extent. If | ||
3112 | * this turns out to be false, we can bail out from this | ||
3113 | * function immediately. | ||
3114 | */ | ||
3115 | if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + | ||
3116 | ext4_ext_get_actual_len(last_ex)) | ||
3117 | return 0; | ||
3118 | /* | ||
3119 | * If the caller does appear to be planning to write at or | ||
3120 | * beyond the end of the current extent, we then test to see | ||
3121 | * if the current extent is the last extent in the file, by | ||
3122 | * checking to make sure it was reached via the rightmost node | ||
3123 | * at each level of the tree. | ||
3124 | */ | ||
3125 | for (i = depth-1; i >= 0; i--) | ||
3126 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3127 | return 0; | ||
3128 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3129 | return ext4_mark_inode_dirty(handle, inode); | ||
3130 | } | ||
3131 | |||
3183 | static int | 3132 | static int |
3184 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3133 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3185 | struct ext4_map_blocks *map, | 3134 | struct ext4_map_blocks *map, |
@@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3206 | * completed | 3155 | * completed |
3207 | */ | 3156 | */ |
3208 | if (io) | 3157 | if (io) |
3209 | io->flag = EXT4_IO_UNWRITTEN; | 3158 | io->flag = EXT4_IO_END_UNWRITTEN; |
3210 | else | 3159 | else |
3211 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3160 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3212 | if (ext4_should_dioread_nolock(inode)) | 3161 | if (ext4_should_dioread_nolock(inode)) |
@@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3217 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { | 3166 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { |
3218 | ret = ext4_convert_unwritten_extents_endio(handle, inode, | 3167 | ret = ext4_convert_unwritten_extents_endio(handle, inode, |
3219 | path); | 3168 | path); |
3220 | if (ret >= 0) | 3169 | if (ret >= 0) { |
3221 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3170 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3171 | err = check_eofblocks_fl(handle, inode, map, path, | ||
3172 | map->m_len); | ||
3173 | } else | ||
3174 | err = ret; | ||
3222 | goto out2; | 3175 | goto out2; |
3223 | } | 3176 | } |
3224 | /* buffered IO case */ | 3177 | /* buffered IO case */ |
@@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3244 | 3197 | ||
3245 | /* buffered write, writepage time, convert*/ | 3198 | /* buffered write, writepage time, convert*/ |
3246 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); | 3199 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
3247 | if (ret >= 0) | 3200 | if (ret >= 0) { |
3248 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3201 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3202 | err = check_eofblocks_fl(handle, inode, map, path, map->m_len); | ||
3203 | if (err < 0) | ||
3204 | goto out2; | ||
3205 | } | ||
3206 | |||
3249 | out: | 3207 | out: |
3250 | if (ret <= 0) { | 3208 | if (ret <= 0) { |
3251 | err = ret; | 3209 | err = ret; |
@@ -3292,6 +3250,7 @@ out2: | |||
3292 | } | 3250 | } |
3293 | return err ? err : allocated; | 3251 | return err ? err : allocated; |
3294 | } | 3252 | } |
3253 | |||
3295 | /* | 3254 | /* |
3296 | * Block allocation/map/preallocation routine for extents based files | 3255 | * Block allocation/map/preallocation routine for extents based files |
3297 | * | 3256 | * |
@@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3315 | { | 3274 | { |
3316 | struct ext4_ext_path *path = NULL; | 3275 | struct ext4_ext_path *path = NULL; |
3317 | struct ext4_extent_header *eh; | 3276 | struct ext4_extent_header *eh; |
3318 | struct ext4_extent newex, *ex, *last_ex; | 3277 | struct ext4_extent newex, *ex; |
3319 | ext4_fsblk_t newblock; | 3278 | ext4_fsblk_t newblock; |
3320 | int i, err = 0, depth, ret, cache_type; | 3279 | int err = 0, depth, ret, cache_type; |
3321 | unsigned int allocated = 0; | 3280 | unsigned int allocated = 0; |
3322 | struct ext4_allocation_request ar; | 3281 | struct ext4_allocation_request ar; |
3323 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3282 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
@@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3341 | /* block is already allocated */ | 3300 | /* block is already allocated */ |
3342 | newblock = map->m_lblk | 3301 | newblock = map->m_lblk |
3343 | - le32_to_cpu(newex.ee_block) | 3302 | - le32_to_cpu(newex.ee_block) |
3344 | + ext_pblock(&newex); | 3303 | + ext4_ext_pblock(&newex); |
3345 | /* number of remaining blocks in the extent */ | 3304 | /* number of remaining blocks in the extent */ |
3346 | allocated = ext4_ext_get_actual_len(&newex) - | 3305 | allocated = ext4_ext_get_actual_len(&newex) - |
3347 | (map->m_lblk - le32_to_cpu(newex.ee_block)); | 3306 | (map->m_lblk - le32_to_cpu(newex.ee_block)); |
@@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3379 | ex = path[depth].p_ext; | 3338 | ex = path[depth].p_ext; |
3380 | if (ex) { | 3339 | if (ex) { |
3381 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 3340 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
3382 | ext4_fsblk_t ee_start = ext_pblock(ex); | 3341 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
3383 | unsigned short ee_len; | 3342 | unsigned short ee_len; |
3384 | 3343 | ||
3385 | /* | 3344 | /* |
@@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3488 | */ | 3447 | */ |
3489 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3448 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3490 | if (io) | 3449 | if (io) |
3491 | io->flag = EXT4_IO_UNWRITTEN; | 3450 | io->flag = EXT4_IO_END_UNWRITTEN; |
3492 | else | 3451 | else |
3493 | ext4_set_inode_state(inode, | 3452 | ext4_set_inode_state(inode, |
3494 | EXT4_STATE_DIO_UNWRITTEN); | 3453 | EXT4_STATE_DIO_UNWRITTEN); |
@@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3497 | map->m_flags |= EXT4_MAP_UNINIT; | 3456 | map->m_flags |= EXT4_MAP_UNINIT; |
3498 | } | 3457 | } |
3499 | 3458 | ||
3500 | if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { | 3459 | err = check_eofblocks_fl(handle, inode, map, path, ar.len); |
3501 | if (unlikely(!eh->eh_entries)) { | 3460 | if (err) |
3502 | EXT4_ERROR_INODE(inode, | 3461 | goto out2; |
3503 | "eh->eh_entries == 0 and " | 3462 | |
3504 | "EOFBLOCKS_FL set"); | ||
3505 | err = -EIO; | ||
3506 | goto out2; | ||
3507 | } | ||
3508 | last_ex = EXT_LAST_EXTENT(eh); | ||
3509 | /* | ||
3510 | * If the current leaf block was reached by looking at | ||
3511 | * the last index block all the way down the tree, and | ||
3512 | * we are extending the inode beyond the last extent | ||
3513 | * in the current leaf block, then clear the | ||
3514 | * EOFBLOCKS_FL flag. | ||
3515 | */ | ||
3516 | for (i = depth-1; i >= 0; i--) { | ||
3517 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3518 | break; | ||
3519 | } | ||
3520 | if ((i < 0) && | ||
3521 | (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + | ||
3522 | ext4_ext_get_actual_len(last_ex))) | ||
3523 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3524 | } | ||
3525 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3463 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3526 | if (err) { | 3464 | if (err) { |
3527 | /* free data blocks we just allocated */ | 3465 | /* free data blocks we just allocated */ |
3528 | /* not a good idea to call discard here directly, | 3466 | /* not a good idea to call discard here directly, |
3529 | * but otherwise we'd need to call it every free() */ | 3467 | * but otherwise we'd need to call it every free() */ |
3530 | ext4_discard_preallocations(inode); | 3468 | ext4_discard_preallocations(inode); |
3531 | ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), | 3469 | ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), |
3532 | ext4_ext_get_actual_len(&newex), 0); | 3470 | ext4_ext_get_actual_len(&newex), 0); |
3533 | goto out2; | 3471 | goto out2; |
3534 | } | 3472 | } |
3535 | 3473 | ||
3536 | /* previous routine could use block we allocated */ | 3474 | /* previous routine could use block we allocated */ |
3537 | newblock = ext_pblock(&newex); | 3475 | newblock = ext4_ext_pblock(&newex); |
3538 | allocated = ext4_ext_get_actual_len(&newex); | 3476 | allocated = ext4_ext_get_actual_len(&newex); |
3539 | if (allocated > map->m_len) | 3477 | if (allocated > map->m_len) |
3540 | allocated = map->m_len; | 3478 | allocated = map->m_len; |
@@ -3729,7 +3667,7 @@ retry: | |||
3729 | printk(KERN_ERR "%s: ext4_ext_map_blocks " | 3667 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3730 | "returned error inode#%lu, block=%u, " | 3668 | "returned error inode#%lu, block=%u, " |
3731 | "max_blocks=%u", __func__, | 3669 | "max_blocks=%u", __func__, |
3732 | inode->i_ino, block, max_blocks); | 3670 | inode->i_ino, map.m_lblk, max_blocks); |
3733 | #endif | 3671 | #endif |
3734 | ext4_mark_inode_dirty(handle, inode); | 3672 | ext4_mark_inode_dirty(handle, inode); |
3735 | ret2 = ext4_journal_stop(handle); | 3673 | ret2 = ext4_journal_stop(handle); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ee92b66d4558..5a5c55ddceef 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
130 | return dquot_file_open(inode, filp); | 130 | return dquot_file_open(inode, filp); |
131 | } | 131 | } |
132 | 132 | ||
133 | /* | ||
134 | * ext4_llseek() copied from generic_file_llseek() to handle both | ||
135 | * block-mapped and extent-mapped maxbytes values. This should | ||
136 | * otherwise be identical with generic_file_llseek(). | ||
137 | */ | ||
138 | loff_t ext4_llseek(struct file *file, loff_t offset, int origin) | ||
139 | { | ||
140 | struct inode *inode = file->f_mapping->host; | ||
141 | loff_t maxbytes; | ||
142 | |||
143 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | ||
144 | maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; | ||
145 | else | ||
146 | maxbytes = inode->i_sb->s_maxbytes; | ||
147 | mutex_lock(&inode->i_mutex); | ||
148 | switch (origin) { | ||
149 | case SEEK_END: | ||
150 | offset += inode->i_size; | ||
151 | break; | ||
152 | case SEEK_CUR: | ||
153 | if (offset == 0) { | ||
154 | mutex_unlock(&inode->i_mutex); | ||
155 | return file->f_pos; | ||
156 | } | ||
157 | offset += file->f_pos; | ||
158 | break; | ||
159 | } | ||
160 | |||
161 | if (offset < 0 || offset > maxbytes) { | ||
162 | mutex_unlock(&inode->i_mutex); | ||
163 | return -EINVAL; | ||
164 | } | ||
165 | |||
166 | if (offset != file->f_pos) { | ||
167 | file->f_pos = offset; | ||
168 | file->f_version = 0; | ||
169 | } | ||
170 | mutex_unlock(&inode->i_mutex); | ||
171 | |||
172 | return offset; | ||
173 | } | ||
174 | |||
133 | const struct file_operations ext4_file_operations = { | 175 | const struct file_operations ext4_file_operations = { |
134 | .llseek = generic_file_llseek, | 176 | .llseek = ext4_llseek, |
135 | .read = do_sync_read, | 177 | .read = do_sync_read, |
136 | .write = do_sync_write, | 178 | .write = do_sync_write, |
137 | .aio_read = generic_file_aio_read, | 179 | .aio_read = generic_file_aio_read, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 3f3ff5ee8f9d..c1a7bc923cf6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -34,6 +34,89 @@ | |||
34 | 34 | ||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | ||
38 | { | ||
39 | #ifdef EXT4_DEBUG | ||
40 | struct list_head *cur, *before, *after; | ||
41 | ext4_io_end_t *io, *io0, *io1; | ||
42 | unsigned long flags; | ||
43 | |||
44 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
45 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
50 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
51 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
52 | cur = &io->list; | ||
53 | before = cur->prev; | ||
54 | io0 = container_of(before, ext4_io_end_t, list); | ||
55 | after = cur->next; | ||
56 | io1 = container_of(after, ext4_io_end_t, list); | ||
57 | |||
58 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
59 | io, inode->i_ino, io0, io1); | ||
60 | } | ||
61 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This function is called from ext4_sync_file(). | ||
67 | * | ||
68 | * When IO is completed, the work to convert unwritten extents to | ||
69 | * written is queued on workqueue but may not get immediately | ||
70 | * scheduled. When fsync is called, we need to ensure the | ||
71 | * conversion is complete before fsync returns. | ||
72 | * The inode keeps track of a list of pending/completed IO that | ||
73 | * might needs to do the conversion. This function walks through | ||
74 | * the list and convert the related unwritten extents for completed IO | ||
75 | * to written. | ||
76 | * The function return the number of pending IOs on success. | ||
77 | */ | ||
78 | static int flush_completed_IO(struct inode *inode) | ||
79 | { | ||
80 | ext4_io_end_t *io; | ||
81 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
82 | unsigned long flags; | ||
83 | int ret = 0; | ||
84 | int ret2 = 0; | ||
85 | |||
86 | if (list_empty(&ei->i_completed_io_list)) | ||
87 | return ret; | ||
88 | |||
89 | dump_completed_IO(inode); | ||
90 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
91 | while (!list_empty(&ei->i_completed_io_list)){ | ||
92 | io = list_entry(ei->i_completed_io_list.next, | ||
93 | ext4_io_end_t, list); | ||
94 | /* | ||
95 | * Calling ext4_end_io_nolock() to convert completed | ||
96 | * IO to written. | ||
97 | * | ||
98 | * When ext4_sync_file() is called, run_queue() may already | ||
99 | * about to flush the work corresponding to this io structure. | ||
100 | * It will be upset if it founds the io structure related | ||
101 | * to the work-to-be schedule is freed. | ||
102 | * | ||
103 | * Thus we need to keep the io structure still valid here after | ||
104 | * convertion finished. The io structure has a flag to | ||
105 | * avoid double converting from both fsync and background work | ||
106 | * queue work. | ||
107 | */ | ||
108 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
109 | ret = ext4_end_io_nolock(io); | ||
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
111 | if (ret < 0) | ||
112 | ret2 = ret; | ||
113 | else | ||
114 | list_del_init(&io->list); | ||
115 | } | ||
116 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
117 | return (ret2 < 0) ? ret2 : 0; | ||
118 | } | ||
119 | |||
37 | /* | 120 | /* |
38 | * If we're not journaling and this is a just-created file, we have to | 121 | * If we're not journaling and this is a just-created file, we have to |
39 | * sync our parent directory (if it was freshly created) since | 122 | * sync our parent directory (if it was freshly created) since |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 45853e0d1f21..1ce240a23ebb 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -50,7 +50,7 @@ | |||
50 | * need to use it within a single byte (to ensure we get endianness right). | 50 | * need to use it within a single byte (to ensure we get endianness right). |
51 | * We can use memset for the rest of the bitmap as there are no other users. | 51 | * We can use memset for the rest of the bitmap as there are no other users. |
52 | */ | 52 | */ |
53 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | 53 | void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | 56 | ||
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
65 | } | 65 | } |
66 | 66 | ||
67 | /* Initializes an uninitialized inode bitmap */ | 67 | /* Initializes an uninitialized inode bitmap */ |
68 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | 68 | static unsigned ext4_init_inode_bitmap(struct super_block *sb, |
69 | ext4_group_t block_group, | 69 | struct buffer_head *bh, |
70 | struct ext4_group_desc *gdp) | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | ||
71 | { | 72 | { |
72 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 73 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
73 | 74 | ||
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
85 | } | 86 | } |
86 | 87 | ||
87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
89 | bh->b_data); | 90 | bh->b_data); |
90 | 91 | ||
91 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
107 | desc = ext4_get_group_desc(sb, block_group, NULL); | 108 | desc = ext4_get_group_desc(sb, block_group, NULL); |
108 | if (!desc) | 109 | if (!desc) |
109 | return NULL; | 110 | return NULL; |
111 | |||
110 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 112 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
111 | bh = sb_getblk(sb, bitmap_blk); | 113 | bh = sb_getblk(sb, bitmap_blk); |
112 | if (unlikely(!bh)) { | 114 | if (unlikely(!bh)) { |
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 125 | unlock_buffer(bh); |
124 | return bh; | 126 | return bh; |
125 | } | 127 | } |
128 | |||
126 | ext4_lock_group(sb, block_group); | 129 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 130 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 131 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
133 | return bh; | 136 | return bh; |
134 | } | 137 | } |
135 | ext4_unlock_group(sb, block_group); | 138 | ext4_unlock_group(sb, block_group); |
139 | |||
136 | if (buffer_uptodate(bh)) { | 140 | if (buffer_uptodate(bh)) { |
137 | /* | 141 | /* |
138 | * if not uninit if bh is uptodate, | 142 | * if not uninit if bh is uptodate, |
@@ -411,8 +415,8 @@ struct orlov_stats { | |||
411 | * for a particular block group or flex_bg. If flex_size is 1, then g | 415 | * for a particular block group or flex_bg. If flex_size is 1, then g |
412 | * is a block group number; otherwise it is flex_bg number. | 416 | * is a block group number; otherwise it is flex_bg number. |
413 | */ | 417 | */ |
414 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | 418 | static void get_orlov_stats(struct super_block *sb, ext4_group_t g, |
415 | int flex_size, struct orlov_stats *stats) | 419 | int flex_size, struct orlov_stats *stats) |
416 | { | 420 | { |
417 | struct ext4_group_desc *desc; | 421 | struct ext4_group_desc *desc; |
418 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; | 422 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb, | |||
712 | { | 716 | { |
713 | int free = 0, retval = 0, count; | 717 | int free = 0, retval = 0, count; |
714 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 718 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
719 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
715 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 720 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
716 | 721 | ||
722 | /* | ||
723 | * We have to be sure that new inode allocation does not race with | ||
724 | * inode table initialization, because otherwise we may end up | ||
725 | * allocating and writing new inode right before sb_issue_zeroout | ||
726 | * takes place and overwriting our new inode with zeroes. So we | ||
727 | * take alloc_sem to prevent it. | ||
728 | */ | ||
729 | down_read(&grp->alloc_sem); | ||
717 | ext4_lock_group(sb, group); | 730 | ext4_lock_group(sb, group); |
718 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 731 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
719 | /* not a free inode */ | 732 | /* not a free inode */ |
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
724 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 737 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
725 | ino > EXT4_INODES_PER_GROUP(sb)) { | 738 | ino > EXT4_INODES_PER_GROUP(sb)) { |
726 | ext4_unlock_group(sb, group); | 739 | ext4_unlock_group(sb, group); |
740 | up_read(&grp->alloc_sem); | ||
727 | ext4_error(sb, "reserved inode or inode > inodes count - " | 741 | ext4_error(sb, "reserved inode or inode > inodes count - " |
728 | "block_group = %u, inode=%lu", group, | 742 | "block_group = %u, inode=%lu", group, |
729 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 743 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
772 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
773 | err_ret: | 787 | err_ret: |
774 | ext4_unlock_group(sb, group); | 788 | ext4_unlock_group(sb, group); |
789 | up_read(&grp->alloc_sem); | ||
775 | return retval; | 790 | return retval; |
776 | } | 791 | } |
777 | 792 | ||
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1205 | } | 1220 | } |
1206 | return count; | 1221 | return count; |
1207 | } | 1222 | } |
1223 | |||
1224 | /* | ||
1225 | * Zeroes not yet zeroed inode table - just write zeroes through the whole | ||
1226 | * inode table. Must be called without any spinlock held. The only place | ||
1227 | * where it is called from on active part of filesystem is ext4lazyinit | ||
1228 | * thread, so we do not need any special locks, however we have to prevent | ||
1229 | * inode allocation from the current group, so we take alloc_sem lock, to | ||
1230 | * block ext4_claim_inode until we are finished. | ||
1231 | */ | ||
1232 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | ||
1233 | int barrier) | ||
1234 | { | ||
1235 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
1236 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1237 | struct ext4_group_desc *gdp = NULL; | ||
1238 | struct buffer_head *group_desc_bh; | ||
1239 | handle_t *handle; | ||
1240 | ext4_fsblk_t blk; | ||
1241 | int num, ret = 0, used_blks = 0; | ||
1242 | |||
1243 | /* This should not happen, but just to be sure check this */ | ||
1244 | if (sb->s_flags & MS_RDONLY) { | ||
1245 | ret = 1; | ||
1246 | goto out; | ||
1247 | } | ||
1248 | |||
1249 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | ||
1250 | if (!gdp) | ||
1251 | goto out; | ||
1252 | |||
1253 | /* | ||
1254 | * We do not need to lock this, because we are the only one | ||
1255 | * handling this flag. | ||
1256 | */ | ||
1257 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
1258 | goto out; | ||
1259 | |||
1260 | handle = ext4_journal_start_sb(sb, 1); | ||
1261 | if (IS_ERR(handle)) { | ||
1262 | ret = PTR_ERR(handle); | ||
1263 | goto out; | ||
1264 | } | ||
1265 | |||
1266 | down_write(&grp->alloc_sem); | ||
1267 | /* | ||
1268 | * If inode bitmap was already initialized there may be some | ||
1269 | * used inodes so we need to skip blocks with used inodes in | ||
1270 | * inode table. | ||
1271 | */ | ||
1272 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
1273 | used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - | ||
1274 | ext4_itable_unused_count(sb, gdp)), | ||
1275 | sbi->s_inodes_per_block); | ||
1276 | |||
1277 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | ||
1278 | ext4_error(sb, "Something is wrong with group %u\n" | ||
1279 | "Used itable blocks: %d" | ||
1280 | "itable unused count: %u\n", | ||
1281 | group, used_blks, | ||
1282 | ext4_itable_unused_count(sb, gdp)); | ||
1283 | ret = 1; | ||
1284 | goto out; | ||
1285 | } | ||
1286 | |||
1287 | blk = ext4_inode_table(sb, gdp) + used_blks; | ||
1288 | num = sbi->s_itb_per_group - used_blks; | ||
1289 | |||
1290 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
1291 | ret = ext4_journal_get_write_access(handle, | ||
1292 | group_desc_bh); | ||
1293 | if (ret) | ||
1294 | goto err_out; | ||
1295 | |||
1296 | /* | ||
1297 | * Skip zeroout if the inode table is full. But we set the ZEROED | ||
1298 | * flag anyway, because obviously, when it is full it does not need | ||
1299 | * further zeroing. | ||
1300 | */ | ||
1301 | if (unlikely(num == 0)) | ||
1302 | goto skip_zeroout; | ||
1303 | |||
1304 | ext4_debug("going to zero out inode table in group %d\n", | ||
1305 | group); | ||
1306 | ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); | ||
1307 | if (ret < 0) | ||
1308 | goto err_out; | ||
1309 | if (barrier) | ||
1310 | blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); | ||
1311 | |||
1312 | skip_zeroout: | ||
1313 | ext4_lock_group(sb, group); | ||
1314 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
1315 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1316 | ext4_unlock_group(sb, group); | ||
1317 | |||
1318 | BUFFER_TRACE(group_desc_bh, | ||
1319 | "call ext4_handle_dirty_metadata"); | ||
1320 | ret = ext4_handle_dirty_metadata(handle, NULL, | ||
1321 | group_desc_bh); | ||
1322 | |||
1323 | err_out: | ||
1324 | up_write(&grp->alloc_sem); | ||
1325 | ext4_journal_stop(handle); | ||
1326 | out: | ||
1327 | return ret; | ||
1328 | } | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4b8debeb3965..bdbe69902207 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -53,6 +53,7 @@ | |||
53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
54 | loff_t new_size) | 54 | loff_t new_size) |
55 | { | 55 | { |
56 | trace_ext4_begin_ordered_truncate(inode, new_size); | ||
56 | return jbd2_journal_begin_ordered_truncate( | 57 | return jbd2_journal_begin_ordered_truncate( |
57 | EXT4_SB(inode->i_sb)->s_journal, | 58 | EXT4_SB(inode->i_sb)->s_journal, |
58 | &EXT4_I(inode)->jinode, | 59 | &EXT4_I(inode)->jinode, |
@@ -60,6 +61,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
60 | } | 61 | } |
61 | 62 | ||
62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 63 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
64 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
65 | struct buffer_head *bh_result, int create); | ||
66 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
67 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
68 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
69 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
63 | 70 | ||
64 | /* | 71 | /* |
65 | * Test whether an inode is a fast symlink. | 72 | * Test whether an inode is a fast symlink. |
@@ -172,6 +179,7 @@ void ext4_evict_inode(struct inode *inode) | |||
172 | handle_t *handle; | 179 | handle_t *handle; |
173 | int err; | 180 | int err; |
174 | 181 | ||
182 | trace_ext4_evict_inode(inode); | ||
175 | if (inode->i_nlink) { | 183 | if (inode->i_nlink) { |
176 | truncate_inode_pages(&inode->i_data, 0); | 184 | truncate_inode_pages(&inode->i_data, 0); |
177 | goto no_delete; | 185 | goto no_delete; |
@@ -755,6 +763,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
755 | * parent to disk. | 763 | * parent to disk. |
756 | */ | 764 | */ |
757 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 765 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
766 | if (unlikely(!bh)) { | ||
767 | err = -EIO; | ||
768 | goto failed; | ||
769 | } | ||
770 | |||
758 | branch[n].bh = bh; | 771 | branch[n].bh = bh; |
759 | lock_buffer(bh); | 772 | lock_buffer(bh); |
760 | BUFFER_TRACE(bh, "call get_create_access"); | 773 | BUFFER_TRACE(bh, "call get_create_access"); |
@@ -1207,8 +1220,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1207 | break; | 1220 | break; |
1208 | idx++; | 1221 | idx++; |
1209 | num++; | 1222 | num++; |
1210 | if (num >= max_pages) | 1223 | if (num >= max_pages) { |
1224 | done = 1; | ||
1211 | break; | 1225 | break; |
1226 | } | ||
1212 | } | 1227 | } |
1213 | pagevec_release(&pvec); | 1228 | pagevec_release(&pvec); |
1214 | } | 1229 | } |
@@ -1538,10 +1553,10 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1553 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1539 | return 0; | 1554 | return 0; |
1540 | /* | 1555 | /* |
1541 | * __block_prepare_write() could have dirtied some buffers. Clean | 1556 | * __block_write_begin() could have dirtied some buffers. Clean |
1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | 1557 | * the dirty bit as jbd2_journal_get_write_access() could complain |
1543 | * otherwise about fs integrity issues. Setting of the dirty bit | 1558 | * otherwise about fs integrity issues. Setting of the dirty bit |
1544 | * by __block_prepare_write() isn't a real problem here as we clear | 1559 | * by __block_write_begin() isn't a real problem here as we clear |
1545 | * the bit before releasing a page lock and thus writeback cannot | 1560 | * the bit before releasing a page lock and thus writeback cannot |
1546 | * ever write the buffer. | 1561 | * ever write the buffer. |
1547 | */ | 1562 | */ |
@@ -1995,16 +2010,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1995 | * | 2010 | * |
1996 | * As pages are already locked by write_cache_pages(), we can't use it | 2011 | * As pages are already locked by write_cache_pages(), we can't use it |
1997 | */ | 2012 | */ |
1998 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2013 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2014 | struct ext4_map_blocks *map) | ||
1999 | { | 2015 | { |
2000 | long pages_skipped; | ||
2001 | struct pagevec pvec; | 2016 | struct pagevec pvec; |
2002 | unsigned long index, end; | 2017 | unsigned long index, end; |
2003 | int ret = 0, err, nr_pages, i; | 2018 | int ret = 0, err, nr_pages, i; |
2004 | struct inode *inode = mpd->inode; | 2019 | struct inode *inode = mpd->inode; |
2005 | struct address_space *mapping = inode->i_mapping; | 2020 | struct address_space *mapping = inode->i_mapping; |
2021 | loff_t size = i_size_read(inode); | ||
2022 | unsigned int len, block_start; | ||
2023 | struct buffer_head *bh, *page_bufs = NULL; | ||
2024 | int journal_data = ext4_should_journal_data(inode); | ||
2025 | sector_t pblock = 0, cur_logical = 0; | ||
2026 | struct ext4_io_submit io_submit; | ||
2006 | 2027 | ||
2007 | BUG_ON(mpd->next_page <= mpd->first_page); | 2028 | BUG_ON(mpd->next_page <= mpd->first_page); |
2029 | memset(&io_submit, 0, sizeof(io_submit)); | ||
2008 | /* | 2030 | /* |
2009 | * We need to start from the first_page to the next_page - 1 | 2031 | * We need to start from the first_page to the next_page - 1 |
2010 | * to make sure we also write the mapped dirty buffer_heads. | 2032 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -2020,122 +2042,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2020 | if (nr_pages == 0) | 2042 | if (nr_pages == 0) |
2021 | break; | 2043 | break; |
2022 | for (i = 0; i < nr_pages; i++) { | 2044 | for (i = 0; i < nr_pages; i++) { |
2045 | int commit_write = 0, redirty_page = 0; | ||
2023 | struct page *page = pvec.pages[i]; | 2046 | struct page *page = pvec.pages[i]; |
2024 | 2047 | ||
2025 | index = page->index; | 2048 | index = page->index; |
2026 | if (index > end) | 2049 | if (index > end) |
2027 | break; | 2050 | break; |
2051 | |||
2052 | if (index == size >> PAGE_CACHE_SHIFT) | ||
2053 | len = size & ~PAGE_CACHE_MASK; | ||
2054 | else | ||
2055 | len = PAGE_CACHE_SIZE; | ||
2056 | if (map) { | ||
2057 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
2058 | inode->i_blkbits); | ||
2059 | pblock = map->m_pblk + (cur_logical - | ||
2060 | map->m_lblk); | ||
2061 | } | ||
2028 | index++; | 2062 | index++; |
2029 | 2063 | ||
2030 | BUG_ON(!PageLocked(page)); | 2064 | BUG_ON(!PageLocked(page)); |
2031 | BUG_ON(PageWriteback(page)); | 2065 | BUG_ON(PageWriteback(page)); |
2032 | 2066 | ||
2033 | pages_skipped = mpd->wbc->pages_skipped; | ||
2034 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
2035 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
2036 | /* | ||
2037 | * have successfully written the page | ||
2038 | * without skipping the same | ||
2039 | */ | ||
2040 | mpd->pages_written++; | ||
2041 | /* | 2067 | /* |
2042 | * In error case, we have to continue because | 2068 | * If the page does not have buffers (for |
2043 | * remaining pages are still locked | 2069 | * whatever reason), try to create them using |
2044 | * XXX: unlock and re-dirty them? | 2070 | * __block_write_begin. If this fails, |
2071 | * redirty the page and move on. | ||
2045 | */ | 2072 | */ |
2046 | if (ret == 0) | 2073 | if (!page_has_buffers(page)) { |
2047 | ret = err; | 2074 | if (__block_write_begin(page, 0, len, |
2048 | } | 2075 | noalloc_get_block_write)) { |
2049 | pagevec_release(&pvec); | 2076 | redirty_page: |
2050 | } | 2077 | redirty_page_for_writepage(mpd->wbc, |
2051 | return ret; | 2078 | page); |
2052 | } | 2079 | unlock_page(page); |
2053 | 2080 | continue; | |
2054 | /* | 2081 | } |
2055 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2082 | commit_write = 1; |
2056 | * | 2083 | } |
2057 | * the function goes through all passed space and put actual disk | ||
2058 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
2059 | */ | ||
2060 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
2061 | struct ext4_map_blocks *map) | ||
2062 | { | ||
2063 | struct inode *inode = mpd->inode; | ||
2064 | struct address_space *mapping = inode->i_mapping; | ||
2065 | int blocks = map->m_len; | ||
2066 | sector_t pblock = map->m_pblk, cur_logical; | ||
2067 | struct buffer_head *head, *bh; | ||
2068 | pgoff_t index, end; | ||
2069 | struct pagevec pvec; | ||
2070 | int nr_pages, i; | ||
2071 | |||
2072 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2073 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2074 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2075 | |||
2076 | pagevec_init(&pvec, 0); | ||
2077 | |||
2078 | while (index <= end) { | ||
2079 | /* XXX: optimize tail */ | ||
2080 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
2081 | if (nr_pages == 0) | ||
2082 | break; | ||
2083 | for (i = 0; i < nr_pages; i++) { | ||
2084 | struct page *page = pvec.pages[i]; | ||
2085 | |||
2086 | index = page->index; | ||
2087 | if (index > end) | ||
2088 | break; | ||
2089 | index++; | ||
2090 | |||
2091 | BUG_ON(!PageLocked(page)); | ||
2092 | BUG_ON(PageWriteback(page)); | ||
2093 | BUG_ON(!page_has_buffers(page)); | ||
2094 | |||
2095 | bh = page_buffers(page); | ||
2096 | head = bh; | ||
2097 | |||
2098 | /* skip blocks out of the range */ | ||
2099 | do { | ||
2100 | if (cur_logical >= map->m_lblk) | ||
2101 | break; | ||
2102 | cur_logical++; | ||
2103 | } while ((bh = bh->b_this_page) != head); | ||
2104 | 2084 | ||
2085 | bh = page_bufs = page_buffers(page); | ||
2086 | block_start = 0; | ||
2105 | do { | 2087 | do { |
2106 | if (cur_logical >= map->m_lblk + blocks) | 2088 | if (!bh) |
2107 | break; | 2089 | goto redirty_page; |
2108 | 2090 | if (map && (cur_logical >= map->m_lblk) && | |
2109 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2091 | (cur_logical <= (map->m_lblk + |
2110 | 2092 | (map->m_len - 1)))) { | |
2111 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
2112 | |||
2113 | if (buffer_delay(bh)) { | 2093 | if (buffer_delay(bh)) { |
2114 | clear_buffer_delay(bh); | 2094 | clear_buffer_delay(bh); |
2115 | bh->b_blocknr = pblock; | 2095 | bh->b_blocknr = pblock; |
2116 | } else { | ||
2117 | /* | ||
2118 | * unwritten already should have | ||
2119 | * blocknr assigned. Verify that | ||
2120 | */ | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | BUG_ON(bh->b_blocknr != pblock); | ||
2123 | } | 2096 | } |
2097 | if (buffer_unwritten(bh) || | ||
2098 | buffer_mapped(bh)) | ||
2099 | BUG_ON(bh->b_blocknr != pblock); | ||
2100 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
2101 | set_buffer_uninit(bh); | ||
2102 | clear_buffer_unwritten(bh); | ||
2103 | } | ||
2124 | 2104 | ||
2125 | } else if (buffer_mapped(bh)) | 2105 | /* redirty page if block allocation undone */ |
2126 | BUG_ON(bh->b_blocknr != pblock); | 2106 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2127 | 2107 | redirty_page = 1; | |
2128 | if (map->m_flags & EXT4_MAP_UNINIT) | 2108 | bh = bh->b_this_page; |
2129 | set_buffer_uninit(bh); | 2109 | block_start += bh->b_size; |
2130 | cur_logical++; | 2110 | cur_logical++; |
2131 | pblock++; | 2111 | pblock++; |
2132 | } while ((bh = bh->b_this_page) != head); | 2112 | } while (bh != page_bufs); |
2113 | |||
2114 | if (redirty_page) | ||
2115 | goto redirty_page; | ||
2116 | |||
2117 | if (commit_write) | ||
2118 | /* mark the buffer_heads as dirty & uptodate */ | ||
2119 | block_commit_write(page, 0, len); | ||
2120 | |||
2121 | /* | ||
2122 | * Delalloc doesn't support data journalling, | ||
2123 | * but eventually maybe we'll lift this | ||
2124 | * restriction. | ||
2125 | */ | ||
2126 | if (unlikely(journal_data && PageChecked(page))) | ||
2127 | err = __ext4_journalled_writepage(page, len); | ||
2128 | else | ||
2129 | err = ext4_bio_write_page(&io_submit, page, | ||
2130 | len, mpd->wbc); | ||
2131 | |||
2132 | if (!err) | ||
2133 | mpd->pages_written++; | ||
2134 | /* | ||
2135 | * In error case, we have to continue because | ||
2136 | * remaining pages are still locked | ||
2137 | */ | ||
2138 | if (ret == 0) | ||
2139 | ret = err; | ||
2133 | } | 2140 | } |
2134 | pagevec_release(&pvec); | 2141 | pagevec_release(&pvec); |
2135 | } | 2142 | } |
2143 | ext4_io_submit(&io_submit); | ||
2144 | return ret; | ||
2136 | } | 2145 | } |
2137 | 2146 | ||
2138 | |||
2139 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2147 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2140 | sector_t logical, long blk_cnt) | 2148 | sector_t logical, long blk_cnt) |
2141 | { | 2149 | { |
@@ -2187,35 +2195,32 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2187 | } | 2195 | } |
2188 | 2196 | ||
2189 | /* | 2197 | /* |
2190 | * mpage_da_map_blocks - go through given space | 2198 | * mpage_da_map_and_submit - go through given space, map them |
2199 | * if necessary, and then submit them for I/O | ||
2191 | * | 2200 | * |
2192 | * @mpd - bh describing space | 2201 | * @mpd - bh describing space |
2193 | * | 2202 | * |
2194 | * The function skips space we know is already mapped to disk blocks. | 2203 | * The function skips space we know is already mapped to disk blocks. |
2195 | * | 2204 | * |
2196 | */ | 2205 | */ |
2197 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2206 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2198 | { | 2207 | { |
2199 | int err, blks, get_blocks_flags; | 2208 | int err, blks, get_blocks_flags; |
2200 | struct ext4_map_blocks map; | 2209 | struct ext4_map_blocks map, *mapp = NULL; |
2201 | sector_t next = mpd->b_blocknr; | 2210 | sector_t next = mpd->b_blocknr; |
2202 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2211 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2203 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2212 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2204 | handle_t *handle = NULL; | 2213 | handle_t *handle = NULL; |
2205 | 2214 | ||
2206 | /* | 2215 | /* |
2207 | * We consider only non-mapped and non-allocated blocks | 2216 | * If the blocks are mapped already, or we couldn't accumulate |
2217 | * any blocks, then proceed immediately to the submission stage. | ||
2208 | */ | 2218 | */ |
2209 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2219 | if ((mpd->b_size == 0) || |
2210 | !(mpd->b_state & (1 << BH_Delay)) && | 2220 | ((mpd->b_state & (1 << BH_Mapped)) && |
2211 | !(mpd->b_state & (1 << BH_Unwritten))) | 2221 | !(mpd->b_state & (1 << BH_Delay)) && |
2212 | return 0; | 2222 | !(mpd->b_state & (1 << BH_Unwritten)))) |
2213 | 2223 | goto submit_io; | |
2214 | /* | ||
2215 | * If we didn't accumulate anything to write simply return | ||
2216 | */ | ||
2217 | if (!mpd->b_size) | ||
2218 | return 0; | ||
2219 | 2224 | ||
2220 | handle = ext4_journal_current_handle(); | 2225 | handle = ext4_journal_current_handle(); |
2221 | BUG_ON(!handle); | 2226 | BUG_ON(!handle); |
@@ -2252,17 +2257,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2252 | 2257 | ||
2253 | err = blks; | 2258 | err = blks; |
2254 | /* | 2259 | /* |
2255 | * If get block returns with error we simply | 2260 | * If get block returns EAGAIN or ENOSPC and there |
2256 | * return. Later writepage will redirty the page and | 2261 | * appears to be free blocks we will call |
2257 | * writepages will find the dirty page again | 2262 | * ext4_writepage() for all of the pages which will |
2263 | * just redirty the pages. | ||
2258 | */ | 2264 | */ |
2259 | if (err == -EAGAIN) | 2265 | if (err == -EAGAIN) |
2260 | return 0; | 2266 | goto submit_io; |
2261 | 2267 | ||
2262 | if (err == -ENOSPC && | 2268 | if (err == -ENOSPC && |
2263 | ext4_count_free_blocks(sb)) { | 2269 | ext4_count_free_blocks(sb)) { |
2264 | mpd->retval = err; | 2270 | mpd->retval = err; |
2265 | return 0; | 2271 | goto submit_io; |
2266 | } | 2272 | } |
2267 | 2273 | ||
2268 | /* | 2274 | /* |
@@ -2287,10 +2293,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2287 | /* invalidate all the pages */ | 2293 | /* invalidate all the pages */ |
2288 | ext4_da_block_invalidatepages(mpd, next, | 2294 | ext4_da_block_invalidatepages(mpd, next, |
2289 | mpd->b_size >> mpd->inode->i_blkbits); | 2295 | mpd->b_size >> mpd->inode->i_blkbits); |
2290 | return err; | 2296 | return; |
2291 | } | 2297 | } |
2292 | BUG_ON(blks == 0); | 2298 | BUG_ON(blks == 0); |
2293 | 2299 | ||
2300 | mapp = ↦ | ||
2294 | if (map.m_flags & EXT4_MAP_NEW) { | 2301 | if (map.m_flags & EXT4_MAP_NEW) { |
2295 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2302 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2296 | int i; | 2303 | int i; |
@@ -2299,18 +2306,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2299 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2306 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2300 | } | 2307 | } |
2301 | 2308 | ||
2302 | /* | ||
2303 | * If blocks are delayed marked, we need to | ||
2304 | * put actual blocknr and drop delayed bit | ||
2305 | */ | ||
2306 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
2307 | (mpd->b_state & (1 << BH_Unwritten))) | ||
2308 | mpage_put_bnr_to_bhs(mpd, &map); | ||
2309 | |||
2310 | if (ext4_should_order_data(mpd->inode)) { | 2309 | if (ext4_should_order_data(mpd->inode)) { |
2311 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2310 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2312 | if (err) | 2311 | if (err) |
2313 | return err; | 2312 | /* This only happens if the journal is aborted */ |
2313 | return; | ||
2314 | } | 2314 | } |
2315 | 2315 | ||
2316 | /* | 2316 | /* |
@@ -2321,10 +2321,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2321 | disksize = i_size_read(mpd->inode); | 2321 | disksize = i_size_read(mpd->inode); |
2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2323 | ext4_update_i_disksize(mpd->inode, disksize); | 2323 | ext4_update_i_disksize(mpd->inode, disksize); |
2324 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2324 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2325 | if (err) | ||
2326 | ext4_error(mpd->inode->i_sb, | ||
2327 | "Failed to mark inode %lu dirty", | ||
2328 | mpd->inode->i_ino); | ||
2325 | } | 2329 | } |
2326 | 2330 | ||
2327 | return 0; | 2331 | submit_io: |
2332 | mpage_da_submit_io(mpd, mapp); | ||
2333 | mpd->io_done = 1; | ||
2328 | } | 2334 | } |
2329 | 2335 | ||
2330 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2336 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -2401,9 +2407,7 @@ flush_it: | |||
2401 | * We couldn't merge the block to our extent, so we | 2407 | * We couldn't merge the block to our extent, so we |
2402 | * need to flush current extent and start new one | 2408 | * need to flush current extent and start new one |
2403 | */ | 2409 | */ |
2404 | if (mpage_da_map_blocks(mpd) == 0) | 2410 | mpage_da_map_and_submit(mpd); |
2405 | mpage_da_submit_io(mpd); | ||
2406 | mpd->io_done = 1; | ||
2407 | return; | 2411 | return; |
2408 | } | 2412 | } |
2409 | 2413 | ||
@@ -2422,9 +2426,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2422 | * The function finds extents of pages and scan them for all blocks. | 2426 | * The function finds extents of pages and scan them for all blocks. |
2423 | */ | 2427 | */ |
2424 | static int __mpage_da_writepage(struct page *page, | 2428 | static int __mpage_da_writepage(struct page *page, |
2425 | struct writeback_control *wbc, void *data) | 2429 | struct writeback_control *wbc, |
2430 | struct mpage_da_data *mpd) | ||
2426 | { | 2431 | { |
2427 | struct mpage_da_data *mpd = data; | ||
2428 | struct inode *inode = mpd->inode; | 2432 | struct inode *inode = mpd->inode; |
2429 | struct buffer_head *bh, *head; | 2433 | struct buffer_head *bh, *head; |
2430 | sector_t logical; | 2434 | sector_t logical; |
@@ -2435,15 +2439,13 @@ static int __mpage_da_writepage(struct page *page, | |||
2435 | if (mpd->next_page != page->index) { | 2439 | if (mpd->next_page != page->index) { |
2436 | /* | 2440 | /* |
2437 | * Nope, we can't. So, we map non-allocated blocks | 2441 | * Nope, we can't. So, we map non-allocated blocks |
2438 | * and start IO on them using writepage() | 2442 | * and start IO on them |
2439 | */ | 2443 | */ |
2440 | if (mpd->next_page != mpd->first_page) { | 2444 | if (mpd->next_page != mpd->first_page) { |
2441 | if (mpage_da_map_blocks(mpd) == 0) | 2445 | mpage_da_map_and_submit(mpd); |
2442 | mpage_da_submit_io(mpd); | ||
2443 | /* | 2446 | /* |
2444 | * skip rest of the page in the page_vec | 2447 | * skip rest of the page in the page_vec |
2445 | */ | 2448 | */ |
2446 | mpd->io_done = 1; | ||
2447 | redirty_page_for_writepage(wbc, page); | 2449 | redirty_page_for_writepage(wbc, page); |
2448 | unlock_page(page); | 2450 | unlock_page(page); |
2449 | return MPAGE_DA_EXTENT_TAIL; | 2451 | return MPAGE_DA_EXTENT_TAIL; |
@@ -2550,8 +2552,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2550 | if (buffer_delay(bh)) | 2552 | if (buffer_delay(bh)) |
2551 | return 0; /* Not sure this could or should happen */ | 2553 | return 0; /* Not sure this could or should happen */ |
2552 | /* | 2554 | /* |
2553 | * XXX: __block_prepare_write() unmaps passed block, | 2555 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
2554 | * is it OK? | ||
2555 | */ | 2556 | */ |
2556 | ret = ext4_da_reserve_space(inode, iblock); | 2557 | ret = ext4_da_reserve_space(inode, iblock); |
2557 | if (ret) | 2558 | if (ret) |
@@ -2583,7 +2584,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2583 | /* | 2584 | /* |
2584 | * This function is used as a standard get_block_t calback function | 2585 | * This function is used as a standard get_block_t calback function |
2585 | * when there is no desire to allocate any blocks. It is used as a | 2586 | * when there is no desire to allocate any blocks. It is used as a |
2586 | * callback function for block_prepare_write() and block_write_full_page(). | 2587 | * callback function for block_write_begin() and block_write_full_page(). |
2587 | * These functions should only try to map a single block at a time. | 2588 | * These functions should only try to map a single block at a time. |
2588 | * | 2589 | * |
2589 | * Since this function doesn't do block allocations even if the caller | 2590 | * Since this function doesn't do block allocations even if the caller |
@@ -2623,6 +2624,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2623 | int ret = 0; | 2624 | int ret = 0; |
2624 | int err; | 2625 | int err; |
2625 | 2626 | ||
2627 | ClearPageChecked(page); | ||
2626 | page_bufs = page_buffers(page); | 2628 | page_bufs = page_buffers(page); |
2627 | BUG_ON(!page_bufs); | 2629 | BUG_ON(!page_bufs); |
2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2630 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
@@ -2700,7 +2702,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2700 | static int ext4_writepage(struct page *page, | 2702 | static int ext4_writepage(struct page *page, |
2701 | struct writeback_control *wbc) | 2703 | struct writeback_control *wbc) |
2702 | { | 2704 | { |
2703 | int ret = 0; | 2705 | int ret = 0, commit_write = 0; |
2704 | loff_t size; | 2706 | loff_t size; |
2705 | unsigned int len; | 2707 | unsigned int len; |
2706 | struct buffer_head *page_bufs = NULL; | 2708 | struct buffer_head *page_bufs = NULL; |
@@ -2713,71 +2715,44 @@ static int ext4_writepage(struct page *page, | |||
2713 | else | 2715 | else |
2714 | len = PAGE_CACHE_SIZE; | 2716 | len = PAGE_CACHE_SIZE; |
2715 | 2717 | ||
2716 | if (page_has_buffers(page)) { | 2718 | /* |
2717 | page_bufs = page_buffers(page); | 2719 | * If the page does not have buffers (for whatever reason), |
2718 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2720 | * try to create them using __block_write_begin. If this |
2719 | ext4_bh_delay_or_unwritten)) { | 2721 | * fails, redirty the page and move on. |
2720 | /* | 2722 | */ |
2721 | * We don't want to do block allocation | 2723 | if (!page_has_buffers(page)) { |
2722 | * So redirty the page and return | 2724 | if (__block_write_begin(page, 0, len, |
2723 | * We may reach here when we do a journal commit | 2725 | noalloc_get_block_write)) { |
2724 | * via journal_submit_inode_data_buffers. | 2726 | redirty_page: |
2725 | * If we don't have mapping block we just ignore | ||
2726 | * them. We can also reach here via shrink_page_list | ||
2727 | */ | ||
2728 | redirty_page_for_writepage(wbc, page); | 2727 | redirty_page_for_writepage(wbc, page); |
2729 | unlock_page(page); | 2728 | unlock_page(page); |
2730 | return 0; | 2729 | return 0; |
2731 | } | 2730 | } |
2732 | } else { | 2731 | commit_write = 1; |
2732 | } | ||
2733 | page_bufs = page_buffers(page); | ||
2734 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2735 | ext4_bh_delay_or_unwritten)) { | ||
2733 | /* | 2736 | /* |
2734 | * The test for page_has_buffers() is subtle: | 2737 | * We don't want to do block allocation, so redirty |
2735 | * We know the page is dirty but it lost buffers. That means | 2738 | * the page and return. We may reach here when we do |
2736 | * that at some moment in time after write_begin()/write_end() | 2739 | * a journal commit via journal_submit_inode_data_buffers. |
2737 | * has been called all buffers have been clean and thus they | 2740 | * We can also reach here via shrink_page_list |
2738 | * must have been written at least once. So they are all | ||
2739 | * mapped and we can happily proceed with mapping them | ||
2740 | * and writing the page. | ||
2741 | * | ||
2742 | * Try to initialize the buffer_heads and check whether | ||
2743 | * all are mapped and non delay. We don't want to | ||
2744 | * do block allocation here. | ||
2745 | */ | 2741 | */ |
2746 | ret = block_prepare_write(page, 0, len, | 2742 | goto redirty_page; |
2747 | noalloc_get_block_write); | 2743 | } |
2748 | if (!ret) { | 2744 | if (commit_write) |
2749 | page_bufs = page_buffers(page); | ||
2750 | /* check whether all are mapped and non delay */ | ||
2751 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2752 | ext4_bh_delay_or_unwritten)) { | ||
2753 | redirty_page_for_writepage(wbc, page); | ||
2754 | unlock_page(page); | ||
2755 | return 0; | ||
2756 | } | ||
2757 | } else { | ||
2758 | /* | ||
2759 | * We can't do block allocation here | ||
2760 | * so just redity the page and unlock | ||
2761 | * and return | ||
2762 | */ | ||
2763 | redirty_page_for_writepage(wbc, page); | ||
2764 | unlock_page(page); | ||
2765 | return 0; | ||
2766 | } | ||
2767 | /* now mark the buffer_heads as dirty and uptodate */ | 2745 | /* now mark the buffer_heads as dirty and uptodate */ |
2768 | block_commit_write(page, 0, len); | 2746 | block_commit_write(page, 0, len); |
2769 | } | ||
2770 | 2747 | ||
2771 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2748 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2772 | /* | 2749 | /* |
2773 | * It's mmapped pagecache. Add buffers and journal it. There | 2750 | * It's mmapped pagecache. Add buffers and journal it. There |
2774 | * doesn't seem much point in redirtying the page here. | 2751 | * doesn't seem much point in redirtying the page here. |
2775 | */ | 2752 | */ |
2776 | ClearPageChecked(page); | ||
2777 | return __ext4_journalled_writepage(page, len); | 2753 | return __ext4_journalled_writepage(page, len); |
2778 | } | ||
2779 | 2754 | ||
2780 | if (page_bufs && buffer_uninit(page_bufs)) { | 2755 | if (buffer_uninit(page_bufs)) { |
2781 | ext4_set_bh_endio(page_bufs, inode); | 2756 | ext4_set_bh_endio(page_bufs, inode); |
2782 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2757 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2783 | wbc, ext4_end_io_buffer_write); | 2758 | wbc, ext4_end_io_buffer_write); |
@@ -2824,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2824 | */ | 2799 | */ |
2825 | static int write_cache_pages_da(struct address_space *mapping, | 2800 | static int write_cache_pages_da(struct address_space *mapping, |
2826 | struct writeback_control *wbc, | 2801 | struct writeback_control *wbc, |
2827 | struct mpage_da_data *mpd) | 2802 | struct mpage_da_data *mpd, |
2803 | pgoff_t *done_index) | ||
2828 | { | 2804 | { |
2829 | int ret = 0; | 2805 | int ret = 0; |
2830 | int done = 0; | 2806 | int done = 0; |
2831 | struct pagevec pvec; | 2807 | struct pagevec pvec; |
2832 | int nr_pages; | 2808 | unsigned nr_pages; |
2833 | pgoff_t index; | 2809 | pgoff_t index; |
2834 | pgoff_t end; /* Inclusive */ | 2810 | pgoff_t end; /* Inclusive */ |
2835 | long nr_to_write = wbc->nr_to_write; | 2811 | long nr_to_write = wbc->nr_to_write; |
2812 | int tag; | ||
2836 | 2813 | ||
2837 | pagevec_init(&pvec, 0); | 2814 | pagevec_init(&pvec, 0); |
2838 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2815 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2839 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2816 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2840 | 2817 | ||
2818 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2819 | tag = PAGECACHE_TAG_TOWRITE; | ||
2820 | else | ||
2821 | tag = PAGECACHE_TAG_DIRTY; | ||
2822 | |||
2823 | *done_index = index; | ||
2841 | while (!done && (index <= end)) { | 2824 | while (!done && (index <= end)) { |
2842 | int i; | 2825 | int i; |
2843 | 2826 | ||
2844 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2827 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2845 | PAGECACHE_TAG_DIRTY, | ||
2846 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2828 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2847 | if (nr_pages == 0) | 2829 | if (nr_pages == 0) |
2848 | break; | 2830 | break; |
@@ -2862,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2862 | break; | 2844 | break; |
2863 | } | 2845 | } |
2864 | 2846 | ||
2847 | *done_index = page->index + 1; | ||
2848 | |||
2865 | lock_page(page); | 2849 | lock_page(page); |
2866 | 2850 | ||
2867 | /* | 2851 | /* |
@@ -2947,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2947 | long desired_nr_to_write, nr_to_writebump = 0; | 2931 | long desired_nr_to_write, nr_to_writebump = 0; |
2948 | loff_t range_start = wbc->range_start; | 2932 | loff_t range_start = wbc->range_start; |
2949 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2933 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2934 | pgoff_t done_index = 0; | ||
2935 | pgoff_t end; | ||
2950 | 2936 | ||
2951 | trace_ext4_da_writepages(inode, wbc); | 2937 | trace_ext4_da_writepages(inode, wbc); |
2952 | 2938 | ||
@@ -2982,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2982 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2968 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2983 | wbc->range_end = LLONG_MAX; | 2969 | wbc->range_end = LLONG_MAX; |
2984 | wbc->range_cyclic = 0; | 2970 | wbc->range_cyclic = 0; |
2985 | } else | 2971 | end = -1; |
2972 | } else { | ||
2986 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2973 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2974 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2975 | } | ||
2987 | 2976 | ||
2988 | /* | 2977 | /* |
2989 | * This works around two forms of stupidity. The first is in | 2978 | * This works around two forms of stupidity. The first is in |
@@ -3002,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3002 | * sbi->max_writeback_mb_bump whichever is smaller. | 2991 | * sbi->max_writeback_mb_bump whichever is smaller. |
3003 | */ | 2992 | */ |
3004 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2993 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
3005 | if (!range_cyclic && range_whole) | 2994 | if (!range_cyclic && range_whole) { |
3006 | desired_nr_to_write = wbc->nr_to_write * 8; | 2995 | if (wbc->nr_to_write == LONG_MAX) |
3007 | else | 2996 | desired_nr_to_write = wbc->nr_to_write; |
2997 | else | ||
2998 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2999 | } else | ||
3008 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 3000 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
3009 | max_pages); | 3001 | max_pages); |
3010 | if (desired_nr_to_write > max_pages) | 3002 | if (desired_nr_to_write > max_pages) |
@@ -3021,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3021 | pages_skipped = wbc->pages_skipped; | 3013 | pages_skipped = wbc->pages_skipped; |
3022 | 3014 | ||
3023 | retry: | 3015 | retry: |
3016 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3017 | tag_pages_for_writeback(mapping, index, end); | ||
3018 | |||
3024 | while (!ret && wbc->nr_to_write > 0) { | 3019 | while (!ret && wbc->nr_to_write > 0) { |
3025 | 3020 | ||
3026 | /* | 3021 | /* |
@@ -3059,16 +3054,14 @@ retry: | |||
3059 | mpd.io_done = 0; | 3054 | mpd.io_done = 0; |
3060 | mpd.pages_written = 0; | 3055 | mpd.pages_written = 0; |
3061 | mpd.retval = 0; | 3056 | mpd.retval = 0; |
3062 | ret = write_cache_pages_da(mapping, wbc, &mpd); | 3057 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3063 | /* | 3058 | /* |
3064 | * If we have a contiguous extent of pages and we | 3059 | * If we have a contiguous extent of pages and we |
3065 | * haven't done the I/O yet, map the blocks and submit | 3060 | * haven't done the I/O yet, map the blocks and submit |
3066 | * them for I/O. | 3061 | * them for I/O. |
3067 | */ | 3062 | */ |
3068 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3063 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3069 | if (mpage_da_map_blocks(&mpd) == 0) | 3064 | mpage_da_map_and_submit(&mpd); |
3070 | mpage_da_submit_io(&mpd); | ||
3071 | mpd.io_done = 1; | ||
3072 | ret = MPAGE_DA_EXTENT_TAIL; | 3065 | ret = MPAGE_DA_EXTENT_TAIL; |
3073 | } | 3066 | } |
3074 | trace_ext4_da_write_pages(inode, &mpd); | 3067 | trace_ext4_da_write_pages(inode, &mpd); |
@@ -3115,14 +3108,13 @@ retry: | |||
3115 | __func__, wbc->nr_to_write, ret); | 3108 | __func__, wbc->nr_to_write, ret); |
3116 | 3109 | ||
3117 | /* Update index */ | 3110 | /* Update index */ |
3118 | index += pages_written; | ||
3119 | wbc->range_cyclic = range_cyclic; | 3111 | wbc->range_cyclic = range_cyclic; |
3120 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3112 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3121 | /* | 3113 | /* |
3122 | * set the writeback_index so that range_cyclic | 3114 | * set the writeback_index so that range_cyclic |
3123 | * mode will write it back later | 3115 | * mode will write it back later |
3124 | */ | 3116 | */ |
3125 | mapping->writeback_index = index; | 3117 | mapping->writeback_index = done_index; |
3126 | 3118 | ||
3127 | out_writepages: | 3119 | out_writepages: |
3128 | wbc->nr_to_write -= nr_to_writebump; | 3120 | wbc->nr_to_write -= nr_to_writebump; |
@@ -3457,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3457 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3449 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3458 | } | 3450 | } |
3459 | 3451 | ||
3460 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3461 | { | ||
3462 | BUG_ON(!io); | ||
3463 | if (io->page) | ||
3464 | put_page(io->page); | ||
3465 | iput(io->inode); | ||
3466 | kfree(io); | ||
3467 | } | ||
3468 | |||
3469 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3452 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3470 | { | 3453 | { |
3471 | struct buffer_head *head, *bh; | 3454 | struct buffer_head *head, *bh; |
@@ -3642,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3642 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3625 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3643 | } | 3626 | } |
3644 | 3627 | ||
3645 | static void dump_completed_IO(struct inode * inode) | ||
3646 | { | ||
3647 | #ifdef EXT4_DEBUG | ||
3648 | struct list_head *cur, *before, *after; | ||
3649 | ext4_io_end_t *io, *io0, *io1; | ||
3650 | unsigned long flags; | ||
3651 | |||
3652 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
3653 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
3654 | return; | ||
3655 | } | ||
3656 | |||
3657 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
3658 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3659 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3660 | cur = &io->list; | ||
3661 | before = cur->prev; | ||
3662 | io0 = container_of(before, ext4_io_end_t, list); | ||
3663 | after = cur->next; | ||
3664 | io1 = container_of(after, ext4_io_end_t, list); | ||
3665 | |||
3666 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3667 | io, inode->i_ino, io0, io1); | ||
3668 | } | ||
3669 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3670 | #endif | ||
3671 | } | ||
3672 | |||
3673 | /* | ||
3674 | * check a range of space and convert unwritten extents to written. | ||
3675 | */ | ||
3676 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3677 | { | ||
3678 | struct inode *inode = io->inode; | ||
3679 | loff_t offset = io->offset; | ||
3680 | ssize_t size = io->size; | ||
3681 | int ret = 0; | ||
3682 | |||
3683 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3684 | "list->prev 0x%p\n", | ||
3685 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3686 | |||
3687 | if (list_empty(&io->list)) | ||
3688 | return ret; | ||
3689 | |||
3690 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3691 | return ret; | ||
3692 | |||
3693 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3694 | if (ret < 0) { | ||
3695 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3696 | "extents to written extents, error is %d" | ||
3697 | " io is still on inode %lu aio dio list\n", | ||
3698 | __func__, ret, inode->i_ino); | ||
3699 | return ret; | ||
3700 | } | ||
3701 | |||
3702 | if (io->iocb) | ||
3703 | aio_complete(io->iocb, io->result, 0); | ||
3704 | /* clear the DIO AIO unwritten flag */ | ||
3705 | io->flag = 0; | ||
3706 | return ret; | ||
3707 | } | ||
3708 | |||
3709 | /* | ||
3710 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3711 | */ | ||
3712 | static void ext4_end_io_work(struct work_struct *work) | ||
3713 | { | ||
3714 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3715 | struct inode *inode = io->inode; | ||
3716 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3717 | unsigned long flags; | ||
3718 | int ret; | ||
3719 | |||
3720 | mutex_lock(&inode->i_mutex); | ||
3721 | ret = ext4_end_io_nolock(io); | ||
3722 | if (ret < 0) { | ||
3723 | mutex_unlock(&inode->i_mutex); | ||
3724 | return; | ||
3725 | } | ||
3726 | |||
3727 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3728 | if (!list_empty(&io->list)) | ||
3729 | list_del_init(&io->list); | ||
3730 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3731 | mutex_unlock(&inode->i_mutex); | ||
3732 | ext4_free_io_end(io); | ||
3733 | } | ||
3734 | |||
3735 | /* | ||
3736 | * This function is called from ext4_sync_file(). | ||
3737 | * | ||
3738 | * When IO is completed, the work to convert unwritten extents to | ||
3739 | * written is queued on workqueue but may not get immediately | ||
3740 | * scheduled. When fsync is called, we need to ensure the | ||
3741 | * conversion is complete before fsync returns. | ||
3742 | * The inode keeps track of a list of pending/completed IO that | ||
3743 | * might needs to do the conversion. This function walks through | ||
3744 | * the list and convert the related unwritten extents for completed IO | ||
3745 | * to written. | ||
3746 | * The function return the number of pending IOs on success. | ||
3747 | */ | ||
3748 | int flush_completed_IO(struct inode *inode) | ||
3749 | { | ||
3750 | ext4_io_end_t *io; | ||
3751 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3752 | unsigned long flags; | ||
3753 | int ret = 0; | ||
3754 | int ret2 = 0; | ||
3755 | |||
3756 | if (list_empty(&ei->i_completed_io_list)) | ||
3757 | return ret; | ||
3758 | |||
3759 | dump_completed_IO(inode); | ||
3760 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3761 | while (!list_empty(&ei->i_completed_io_list)){ | ||
3762 | io = list_entry(ei->i_completed_io_list.next, | ||
3763 | ext4_io_end_t, list); | ||
3764 | /* | ||
3765 | * Calling ext4_end_io_nolock() to convert completed | ||
3766 | * IO to written. | ||
3767 | * | ||
3768 | * When ext4_sync_file() is called, run_queue() may already | ||
3769 | * about to flush the work corresponding to this io structure. | ||
3770 | * It will be upset if it founds the io structure related | ||
3771 | * to the work-to-be schedule is freed. | ||
3772 | * | ||
3773 | * Thus we need to keep the io structure still valid here after | ||
3774 | * convertion finished. The io structure has a flag to | ||
3775 | * avoid double converting from both fsync and background work | ||
3776 | * queue work. | ||
3777 | */ | ||
3778 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3779 | ret = ext4_end_io_nolock(io); | ||
3780 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3781 | if (ret < 0) | ||
3782 | ret2 = ret; | ||
3783 | else | ||
3784 | list_del_init(&io->list); | ||
3785 | } | ||
3786 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3787 | return (ret2 < 0) ? ret2 : 0; | ||
3788 | } | ||
3789 | |||
3790 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3791 | { | ||
3792 | ext4_io_end_t *io = NULL; | ||
3793 | |||
3794 | io = kmalloc(sizeof(*io), flags); | ||
3795 | |||
3796 | if (io) { | ||
3797 | igrab(inode); | ||
3798 | io->inode = inode; | ||
3799 | io->flag = 0; | ||
3800 | io->offset = 0; | ||
3801 | io->size = 0; | ||
3802 | io->page = NULL; | ||
3803 | io->iocb = NULL; | ||
3804 | io->result = 0; | ||
3805 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3806 | INIT_LIST_HEAD(&io->list); | ||
3807 | } | ||
3808 | |||
3809 | return io; | ||
3810 | } | ||
3811 | |||
3812 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3628 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3813 | ssize_t size, void *private, int ret, | 3629 | ssize_t size, void *private, int ret, |
3814 | bool is_async) | 3630 | bool is_async) |
@@ -3828,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3828 | size); | 3644 | size); |
3829 | 3645 | ||
3830 | /* if not aio dio with unwritten extents, just free io and return */ | 3646 | /* if not aio dio with unwritten extents, just free io and return */ |
3831 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3647 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3832 | ext4_free_io_end(io_end); | 3648 | ext4_free_io_end(io_end); |
3833 | iocb->private = NULL; | 3649 | iocb->private = NULL; |
3834 | out: | 3650 | out: |
@@ -3845,14 +3661,14 @@ out: | |||
3845 | } | 3661 | } |
3846 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3662 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3847 | 3663 | ||
3848 | /* queue the work to convert unwritten extents to written */ | ||
3849 | queue_work(wq, &io_end->work); | ||
3850 | |||
3851 | /* Add the io_end to per-inode completed aio dio list*/ | 3664 | /* Add the io_end to per-inode completed aio dio list*/ |
3852 | ei = EXT4_I(io_end->inode); | 3665 | ei = EXT4_I(io_end->inode); |
3853 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3666 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3854 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3667 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3855 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3668 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3669 | |||
3670 | /* queue the work to convert unwritten extents to written */ | ||
3671 | queue_work(wq, &io_end->work); | ||
3856 | iocb->private = NULL; | 3672 | iocb->private = NULL; |
3857 | } | 3673 | } |
3858 | 3674 | ||
@@ -3873,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3873 | goto out; | 3689 | goto out; |
3874 | } | 3690 | } |
3875 | 3691 | ||
3876 | io_end->flag = EXT4_IO_UNWRITTEN; | 3692 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3877 | inode = io_end->inode; | 3693 | inode = io_end->inode; |
3878 | 3694 | ||
3879 | /* Add the io_end to per-inode completed io list*/ | 3695 | /* Add the io_end to per-inode completed io list*/ |
@@ -5464,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5464 | { | 5280 | { |
5465 | struct inode *inode = dentry->d_inode; | 5281 | struct inode *inode = dentry->d_inode; |
5466 | int error, rc = 0; | 5282 | int error, rc = 0; |
5283 | int orphan = 0; | ||
5467 | const unsigned int ia_valid = attr->ia_valid; | 5284 | const unsigned int ia_valid = attr->ia_valid; |
5468 | 5285 | ||
5469 | error = inode_change_ok(inode, attr); | 5286 | error = inode_change_ok(inode, attr); |
@@ -5519,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5519 | error = PTR_ERR(handle); | 5336 | error = PTR_ERR(handle); |
5520 | goto err_out; | 5337 | goto err_out; |
5521 | } | 5338 | } |
5522 | 5339 | if (ext4_handle_valid(handle)) { | |
5523 | error = ext4_orphan_add(handle, inode); | 5340 | error = ext4_orphan_add(handle, inode); |
5341 | orphan = 1; | ||
5342 | } | ||
5524 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5343 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5525 | rc = ext4_mark_inode_dirty(handle, inode); | 5344 | rc = ext4_mark_inode_dirty(handle, inode); |
5526 | if (!error) | 5345 | if (!error) |
@@ -5538,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5538 | goto err_out; | 5357 | goto err_out; |
5539 | } | 5358 | } |
5540 | ext4_orphan_del(handle, inode); | 5359 | ext4_orphan_del(handle, inode); |
5360 | orphan = 0; | ||
5541 | ext4_journal_stop(handle); | 5361 | ext4_journal_stop(handle); |
5542 | goto err_out; | 5362 | goto err_out; |
5543 | } | 5363 | } |
@@ -5560,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5560 | * If the call to ext4_truncate failed to get a transaction handle at | 5380 | * If the call to ext4_truncate failed to get a transaction handle at |
5561 | * all, we need to clean up the in-core orphan list manually. | 5381 | * all, we need to clean up the in-core orphan list manually. |
5562 | */ | 5382 | */ |
5563 | if (inode->i_nlink) | 5383 | if (orphan && inode->i_nlink) |
5564 | ext4_orphan_del(NULL, inode); | 5384 | ext4_orphan_del(NULL, inode); |
5565 | 5385 | ||
5566 | if (!rc && (ia_valid & ATTR_MODE)) | 5386 | if (!rc && (ia_valid & ATTR_MODE)) |
@@ -5592,9 +5412,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
5592 | * will return the blocks that include the delayed allocation | 5412 | * will return the blocks that include the delayed allocation |
5593 | * blocks for this file. | 5413 | * blocks for this file. |
5594 | */ | 5414 | */ |
5595 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5596 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 5415 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; |
5597 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5598 | 5416 | ||
5599 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 5417 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; |
5600 | return 0; | 5418 | return 0; |
@@ -5643,7 +5461,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5643 | * | 5461 | * |
5644 | * Also account for superblock, inode, quota and xattr blocks | 5462 | * Also account for superblock, inode, quota and xattr blocks |
5645 | */ | 5463 | */ |
5646 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5464 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5647 | { | 5465 | { |
5648 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5466 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5649 | int gdpblocks; | 5467 | int gdpblocks; |
@@ -5831,6 +5649,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5831 | int err, ret; | 5649 | int err, ret; |
5832 | 5650 | ||
5833 | might_sleep(); | 5651 | might_sleep(); |
5652 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | ||
5834 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5653 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5835 | if (ext4_handle_valid(handle) && | 5654 | if (ext4_handle_valid(handle) && |
5836 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5655 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bf5ae883b1bd..eb3bc2fe647e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -331,6 +331,30 @@ mext_out: | |||
331 | return err; | 331 | return err; |
332 | } | 332 | } |
333 | 333 | ||
334 | case FITRIM: | ||
335 | { | ||
336 | struct super_block *sb = inode->i_sb; | ||
337 | struct fstrim_range range; | ||
338 | int ret = 0; | ||
339 | |||
340 | if (!capable(CAP_SYS_ADMIN)) | ||
341 | return -EPERM; | ||
342 | |||
343 | if (copy_from_user(&range, (struct fstrim_range *)arg, | ||
344 | sizeof(range))) | ||
345 | return -EFAULT; | ||
346 | |||
347 | ret = ext4_trim_fs(sb, &range); | ||
348 | if (ret < 0) | ||
349 | return ret; | ||
350 | |||
351 | if (copy_to_user((struct fstrim_range *)arg, &range, | ||
352 | sizeof(range))) | ||
353 | return -EFAULT; | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
334 | default: | 358 | default: |
335 | return -ENOTTY; | 359 | return -ENOTTY; |
336 | } | 360 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 19aa0d44d822..5b4d4e3a4d58 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -338,6 +338,14 @@ | |||
338 | static struct kmem_cache *ext4_pspace_cachep; | 338 | static struct kmem_cache *ext4_pspace_cachep; |
339 | static struct kmem_cache *ext4_ac_cachep; | 339 | static struct kmem_cache *ext4_ac_cachep; |
340 | static struct kmem_cache *ext4_free_ext_cachep; | 340 | static struct kmem_cache *ext4_free_ext_cachep; |
341 | |||
342 | /* We create slab caches for groupinfo data structures based on the | ||
343 | * superblock block size. There will be one per mounted filesystem for | ||
344 | * each unique s_blocksize_bits */ | ||
345 | #define NR_GRPINFO_CACHES \ | ||
346 | (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1) | ||
347 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; | ||
348 | |||
341 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 349 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
342 | ext4_group_t group); | 350 | ext4_group_t group); |
343 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 351 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
@@ -939,6 +947,85 @@ out: | |||
939 | } | 947 | } |
940 | 948 | ||
941 | /* | 949 | /* |
950 | * lock the group_info alloc_sem of all the groups | ||
951 | * belonging to the same buddy cache page. This | ||
952 | * make sure other parallel operation on the buddy | ||
953 | * cache doesn't happen whild holding the buddy cache | ||
954 | * lock | ||
955 | */ | ||
956 | static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | ||
957 | ext4_group_t group) | ||
958 | { | ||
959 | int i; | ||
960 | int block, pnum; | ||
961 | int blocks_per_page; | ||
962 | int groups_per_page; | ||
963 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
964 | ext4_group_t first_group; | ||
965 | struct ext4_group_info *grp; | ||
966 | |||
967 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
968 | /* | ||
969 | * the buddy cache inode stores the block bitmap | ||
970 | * and buddy information in consecutive blocks. | ||
971 | * So for each group we need two blocks. | ||
972 | */ | ||
973 | block = group * 2; | ||
974 | pnum = block / blocks_per_page; | ||
975 | first_group = pnum * blocks_per_page / 2; | ||
976 | |||
977 | groups_per_page = blocks_per_page >> 1; | ||
978 | if (groups_per_page == 0) | ||
979 | groups_per_page = 1; | ||
980 | /* read all groups the page covers into the cache */ | ||
981 | for (i = 0; i < groups_per_page; i++) { | ||
982 | |||
983 | if ((first_group + i) >= ngroups) | ||
984 | break; | ||
985 | grp = ext4_get_group_info(sb, first_group + i); | ||
986 | /* take all groups write allocation | ||
987 | * semaphore. This make sure there is | ||
988 | * no block allocation going on in any | ||
989 | * of that groups | ||
990 | */ | ||
991 | down_write_nested(&grp->alloc_sem, i); | ||
992 | } | ||
993 | return i; | ||
994 | } | ||
995 | |||
996 | static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
997 | ext4_group_t group, int locked_group) | ||
998 | { | ||
999 | int i; | ||
1000 | int block, pnum; | ||
1001 | int blocks_per_page; | ||
1002 | ext4_group_t first_group; | ||
1003 | struct ext4_group_info *grp; | ||
1004 | |||
1005 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1006 | /* | ||
1007 | * the buddy cache inode stores the block bitmap | ||
1008 | * and buddy information in consecutive blocks. | ||
1009 | * So for each group we need two blocks. | ||
1010 | */ | ||
1011 | block = group * 2; | ||
1012 | pnum = block / blocks_per_page; | ||
1013 | first_group = pnum * blocks_per_page / 2; | ||
1014 | /* release locks on all the groups */ | ||
1015 | for (i = 0; i < locked_group; i++) { | ||
1016 | |||
1017 | grp = ext4_get_group_info(sb, first_group + i); | ||
1018 | /* take all groups write allocation | ||
1019 | * semaphore. This make sure there is | ||
1020 | * no block allocation going on in any | ||
1021 | * of that groups | ||
1022 | */ | ||
1023 | up_write(&grp->alloc_sem); | ||
1024 | } | ||
1025 | |||
1026 | } | ||
1027 | |||
1028 | /* | ||
942 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | 1029 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the |
943 | * block group lock of all groups for this page; do not hold the BG lock when | 1030 | * block group lock of all groups for this page; do not hold the BG lock when |
944 | * calling this routine! | 1031 | * calling this routine! |
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1915 | return 0; | 2002 | return 0; |
1916 | } | 2003 | } |
1917 | 2004 | ||
1918 | /* | ||
1919 | * lock the group_info alloc_sem of all the groups | ||
1920 | * belonging to the same buddy cache page. This | ||
1921 | * make sure other parallel operation on the buddy | ||
1922 | * cache doesn't happen whild holding the buddy cache | ||
1923 | * lock | ||
1924 | */ | ||
1925 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1926 | { | ||
1927 | int i; | ||
1928 | int block, pnum; | ||
1929 | int blocks_per_page; | ||
1930 | int groups_per_page; | ||
1931 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1932 | ext4_group_t first_group; | ||
1933 | struct ext4_group_info *grp; | ||
1934 | |||
1935 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1936 | /* | ||
1937 | * the buddy cache inode stores the block bitmap | ||
1938 | * and buddy information in consecutive blocks. | ||
1939 | * So for each group we need two blocks. | ||
1940 | */ | ||
1941 | block = group * 2; | ||
1942 | pnum = block / blocks_per_page; | ||
1943 | first_group = pnum * blocks_per_page / 2; | ||
1944 | |||
1945 | groups_per_page = blocks_per_page >> 1; | ||
1946 | if (groups_per_page == 0) | ||
1947 | groups_per_page = 1; | ||
1948 | /* read all groups the page covers into the cache */ | ||
1949 | for (i = 0; i < groups_per_page; i++) { | ||
1950 | |||
1951 | if ((first_group + i) >= ngroups) | ||
1952 | break; | ||
1953 | grp = ext4_get_group_info(sb, first_group + i); | ||
1954 | /* take all groups write allocation | ||
1955 | * semaphore. This make sure there is | ||
1956 | * no block allocation going on in any | ||
1957 | * of that groups | ||
1958 | */ | ||
1959 | down_write_nested(&grp->alloc_sem, i); | ||
1960 | } | ||
1961 | return i; | ||
1962 | } | ||
1963 | |||
1964 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1965 | ext4_group_t group, int locked_group) | ||
1966 | { | ||
1967 | int i; | ||
1968 | int block, pnum; | ||
1969 | int blocks_per_page; | ||
1970 | ext4_group_t first_group; | ||
1971 | struct ext4_group_info *grp; | ||
1972 | |||
1973 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1974 | /* | ||
1975 | * the buddy cache inode stores the block bitmap | ||
1976 | * and buddy information in consecutive blocks. | ||
1977 | * So for each group we need two blocks. | ||
1978 | */ | ||
1979 | block = group * 2; | ||
1980 | pnum = block / blocks_per_page; | ||
1981 | first_group = pnum * blocks_per_page / 2; | ||
1982 | /* release locks on all the groups */ | ||
1983 | for (i = 0; i < locked_group; i++) { | ||
1984 | |||
1985 | grp = ext4_get_group_info(sb, first_group + i); | ||
1986 | /* take all groups write allocation | ||
1987 | * semaphore. This make sure there is | ||
1988 | * no block allocation going on in any | ||
1989 | * of that groups | ||
1990 | */ | ||
1991 | up_write(&grp->alloc_sem); | ||
1992 | } | ||
1993 | |||
1994 | } | ||
1995 | |||
1996 | static noinline_for_stack int | 2005 | static noinline_for_stack int |
1997 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 2006 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1998 | { | 2007 | { |
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = { | |||
2233 | .release = seq_release, | 2242 | .release = seq_release, |
2234 | }; | 2243 | }; |
2235 | 2244 | ||
2245 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | ||
2246 | { | ||
2247 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2248 | struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index]; | ||
2249 | |||
2250 | BUG_ON(!cachep); | ||
2251 | return cachep; | ||
2252 | } | ||
2236 | 2253 | ||
2237 | /* Create and initialize ext4_group_info data for the given group. */ | 2254 | /* Create and initialize ext4_group_info data for the given group. */ |
2238 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2255 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2239 | struct ext4_group_desc *desc) | 2256 | struct ext4_group_desc *desc) |
2240 | { | 2257 | { |
2241 | int i, len; | 2258 | int i; |
2242 | int metalen = 0; | 2259 | int metalen = 0; |
2243 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2260 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2244 | struct ext4_group_info **meta_group_info; | 2261 | struct ext4_group_info **meta_group_info; |
2262 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2245 | 2263 | ||
2246 | /* | 2264 | /* |
2247 | * First check if this group is the first of a reserved block. | 2265 | * First check if this group is the first of a reserved block. |
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2261 | meta_group_info; | 2279 | meta_group_info; |
2262 | } | 2280 | } |
2263 | 2281 | ||
2264 | /* | ||
2265 | * calculate needed size. if change bb_counters size, | ||
2266 | * don't forget about ext4_mb_generate_buddy() | ||
2267 | */ | ||
2268 | len = offsetof(typeof(**meta_group_info), | ||
2269 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2270 | |||
2271 | meta_group_info = | 2282 | meta_group_info = |
2272 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2283 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2273 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2284 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2274 | 2285 | ||
2275 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | 2286 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2276 | if (meta_group_info[i] == NULL) { | 2287 | if (meta_group_info[i] == NULL) { |
2277 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | 2288 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); |
2278 | goto exit_group_info; | 2289 | goto exit_group_info; |
2279 | } | 2290 | } |
2291 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2280 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2292 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2281 | &(meta_group_info[i]->bb_state)); | 2293 | &(meta_group_info[i]->bb_state)); |
2282 | 2294 | ||
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2331 | int num_meta_group_infos_max; | 2343 | int num_meta_group_infos_max; |
2332 | int array_size; | 2344 | int array_size; |
2333 | struct ext4_group_desc *desc; | 2345 | struct ext4_group_desc *desc; |
2346 | struct kmem_cache *cachep; | ||
2334 | 2347 | ||
2335 | /* This is the number of blocks used by GDT */ | 2348 | /* This is the number of blocks used by GDT */ |
2336 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2349 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
@@ -2373,6 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2373 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); | 2386 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); |
2374 | goto err_freesgi; | 2387 | goto err_freesgi; |
2375 | } | 2388 | } |
2389 | sbi->s_buddy_cache->i_ino = get_next_ino(); | ||
2376 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2390 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2377 | for (i = 0; i < ngroups; i++) { | 2391 | for (i = 0; i < ngroups; i++) { |
2378 | desc = ext4_get_group_desc(sb, i, NULL); | 2392 | desc = ext4_get_group_desc(sb, i, NULL); |
@@ -2388,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2388 | return 0; | 2402 | return 0; |
2389 | 2403 | ||
2390 | err_freebuddy: | 2404 | err_freebuddy: |
2405 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2391 | while (i-- > 0) | 2406 | while (i-- > 0) |
2392 | kfree(ext4_get_group_info(sb, i)); | 2407 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2393 | i = num_meta_group_infos; | 2408 | i = num_meta_group_infos; |
2394 | while (i-- > 0) | 2409 | while (i-- > 0) |
2395 | kfree(sbi->s_group_info[i]); | 2410 | kfree(sbi->s_group_info[i]); |
@@ -2406,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2406 | unsigned offset; | 2421 | unsigned offset; |
2407 | unsigned max; | 2422 | unsigned max; |
2408 | int ret; | 2423 | int ret; |
2424 | int cache_index; | ||
2425 | struct kmem_cache *cachep; | ||
2426 | char *namep = NULL; | ||
2409 | 2427 | ||
2410 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); | 2428 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2411 | 2429 | ||
2412 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2430 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2413 | if (sbi->s_mb_offsets == NULL) { | 2431 | if (sbi->s_mb_offsets == NULL) { |
2414 | return -ENOMEM; | 2432 | ret = -ENOMEM; |
2433 | goto out; | ||
2415 | } | 2434 | } |
2416 | 2435 | ||
2417 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); | 2436 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2418 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2437 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2419 | if (sbi->s_mb_maxs == NULL) { | 2438 | if (sbi->s_mb_maxs == NULL) { |
2420 | kfree(sbi->s_mb_offsets); | 2439 | ret = -ENOMEM; |
2421 | return -ENOMEM; | 2440 | goto out; |
2441 | } | ||
2442 | |||
2443 | cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2444 | cachep = ext4_groupinfo_caches[cache_index]; | ||
2445 | if (!cachep) { | ||
2446 | char name[32]; | ||
2447 | int len = offsetof(struct ext4_group_info, | ||
2448 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2449 | |||
2450 | sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits); | ||
2451 | namep = kstrdup(name, GFP_KERNEL); | ||
2452 | if (!namep) { | ||
2453 | ret = -ENOMEM; | ||
2454 | goto out; | ||
2455 | } | ||
2456 | |||
2457 | /* Need to free the kmem_cache_name() when we | ||
2458 | * destroy the slab */ | ||
2459 | cachep = kmem_cache_create(namep, len, 0, | ||
2460 | SLAB_RECLAIM_ACCOUNT, NULL); | ||
2461 | if (!cachep) { | ||
2462 | ret = -ENOMEM; | ||
2463 | goto out; | ||
2464 | } | ||
2465 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2422 | } | 2466 | } |
2423 | 2467 | ||
2424 | /* order 0 is regular bitmap */ | 2468 | /* order 0 is regular bitmap */ |
@@ -2439,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2439 | /* init file for buddy data */ | 2483 | /* init file for buddy data */ |
2440 | ret = ext4_mb_init_backend(sb); | 2484 | ret = ext4_mb_init_backend(sb); |
2441 | if (ret != 0) { | 2485 | if (ret != 0) { |
2442 | kfree(sbi->s_mb_offsets); | 2486 | goto out; |
2443 | kfree(sbi->s_mb_maxs); | ||
2444 | return ret; | ||
2445 | } | 2487 | } |
2446 | 2488 | ||
2447 | spin_lock_init(&sbi->s_md_lock); | 2489 | spin_lock_init(&sbi->s_md_lock); |
@@ -2456,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2456 | 2498 | ||
2457 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2499 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2458 | if (sbi->s_locality_groups == NULL) { | 2500 | if (sbi->s_locality_groups == NULL) { |
2459 | kfree(sbi->s_mb_offsets); | 2501 | ret = -ENOMEM; |
2460 | kfree(sbi->s_mb_maxs); | 2502 | goto out; |
2461 | return -ENOMEM; | ||
2462 | } | 2503 | } |
2463 | for_each_possible_cpu(i) { | 2504 | for_each_possible_cpu(i) { |
2464 | struct ext4_locality_group *lg; | 2505 | struct ext4_locality_group *lg; |
@@ -2475,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2475 | 2516 | ||
2476 | if (sbi->s_journal) | 2517 | if (sbi->s_journal) |
2477 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2518 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2478 | return 0; | 2519 | out: |
2520 | if (ret) { | ||
2521 | kfree(sbi->s_mb_offsets); | ||
2522 | kfree(sbi->s_mb_maxs); | ||
2523 | kfree(namep); | ||
2524 | } | ||
2525 | return ret; | ||
2479 | } | 2526 | } |
2480 | 2527 | ||
2481 | /* need to called with the ext4 group lock held */ | 2528 | /* need to called with the ext4 group lock held */ |
@@ -2503,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2503 | int num_meta_group_infos; | 2550 | int num_meta_group_infos; |
2504 | struct ext4_group_info *grinfo; | 2551 | struct ext4_group_info *grinfo; |
2505 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2552 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2553 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2506 | 2554 | ||
2507 | if (sbi->s_group_info) { | 2555 | if (sbi->s_group_info) { |
2508 | for (i = 0; i < ngroups; i++) { | 2556 | for (i = 0; i < ngroups; i++) { |
@@ -2513,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2513 | ext4_lock_group(sb, i); | 2561 | ext4_lock_group(sb, i); |
2514 | ext4_mb_cleanup_pa(grinfo); | 2562 | ext4_mb_cleanup_pa(grinfo); |
2515 | ext4_unlock_group(sb, i); | 2563 | ext4_unlock_group(sb, i); |
2516 | kfree(grinfo); | 2564 | kmem_cache_free(cachep, grinfo); |
2517 | } | 2565 | } |
2518 | num_meta_group_infos = (ngroups + | 2566 | num_meta_group_infos = (ngroups + |
2519 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2567 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
@@ -2557,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2557 | return 0; | 2605 | return 0; |
2558 | } | 2606 | } |
2559 | 2607 | ||
2560 | static inline void ext4_issue_discard(struct super_block *sb, | 2608 | static inline int ext4_issue_discard(struct super_block *sb, |
2561 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2609 | ext4_group_t block_group, ext4_grpblk_t block, int count) |
2562 | { | 2610 | { |
2563 | int ret; | 2611 | int ret; |
@@ -2567,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb, | |||
2567 | trace_ext4_discard_blocks(sb, | 2615 | trace_ext4_discard_blocks(sb, |
2568 | (unsigned long long) discard_block, count); | 2616 | (unsigned long long) discard_block, count); |
2569 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); | 2617 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
2570 | if (ret == EOPNOTSUPP) { | 2618 | if (ret == -EOPNOTSUPP) { |
2571 | ext4_warning(sb, "discard not supported, disabling"); | 2619 | ext4_warning(sb, "discard not supported, disabling"); |
2572 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | 2620 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); |
2573 | } | 2621 | } |
2622 | return ret; | ||
2574 | } | 2623 | } |
2575 | 2624 | ||
2576 | /* | 2625 | /* |
@@ -2658,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void) | |||
2658 | 2707 | ||
2659 | #endif | 2708 | #endif |
2660 | 2709 | ||
2661 | int __init init_ext4_mballoc(void) | 2710 | int __init ext4_init_mballoc(void) |
2662 | { | 2711 | { |
2663 | ext4_pspace_cachep = | 2712 | ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, |
2664 | kmem_cache_create("ext4_prealloc_space", | 2713 | SLAB_RECLAIM_ACCOUNT); |
2665 | sizeof(struct ext4_prealloc_space), | ||
2666 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2667 | if (ext4_pspace_cachep == NULL) | 2714 | if (ext4_pspace_cachep == NULL) |
2668 | return -ENOMEM; | 2715 | return -ENOMEM; |
2669 | 2716 | ||
2670 | ext4_ac_cachep = | 2717 | ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, |
2671 | kmem_cache_create("ext4_alloc_context", | 2718 | SLAB_RECLAIM_ACCOUNT); |
2672 | sizeof(struct ext4_allocation_context), | ||
2673 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2674 | if (ext4_ac_cachep == NULL) { | 2719 | if (ext4_ac_cachep == NULL) { |
2675 | kmem_cache_destroy(ext4_pspace_cachep); | 2720 | kmem_cache_destroy(ext4_pspace_cachep); |
2676 | return -ENOMEM; | 2721 | return -ENOMEM; |
2677 | } | 2722 | } |
2678 | 2723 | ||
2679 | ext4_free_ext_cachep = | 2724 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, |
2680 | kmem_cache_create("ext4_free_block_extents", | 2725 | SLAB_RECLAIM_ACCOUNT); |
2681 | sizeof(struct ext4_free_data), | ||
2682 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2683 | if (ext4_free_ext_cachep == NULL) { | 2726 | if (ext4_free_ext_cachep == NULL) { |
2684 | kmem_cache_destroy(ext4_pspace_cachep); | 2727 | kmem_cache_destroy(ext4_pspace_cachep); |
2685 | kmem_cache_destroy(ext4_ac_cachep); | 2728 | kmem_cache_destroy(ext4_ac_cachep); |
@@ -2689,8 +2732,9 @@ int __init init_ext4_mballoc(void) | |||
2689 | return 0; | 2732 | return 0; |
2690 | } | 2733 | } |
2691 | 2734 | ||
2692 | void exit_ext4_mballoc(void) | 2735 | void ext4_exit_mballoc(void) |
2693 | { | 2736 | { |
2737 | int i; | ||
2694 | /* | 2738 | /* |
2695 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2739 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
2696 | * before destroying the slab cache. | 2740 | * before destroying the slab cache. |
@@ -2699,6 +2743,15 @@ void exit_ext4_mballoc(void) | |||
2699 | kmem_cache_destroy(ext4_pspace_cachep); | 2743 | kmem_cache_destroy(ext4_pspace_cachep); |
2700 | kmem_cache_destroy(ext4_ac_cachep); | 2744 | kmem_cache_destroy(ext4_ac_cachep); |
2701 | kmem_cache_destroy(ext4_free_ext_cachep); | 2745 | kmem_cache_destroy(ext4_free_ext_cachep); |
2746 | |||
2747 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2748 | struct kmem_cache *cachep = ext4_groupinfo_caches[i]; | ||
2749 | if (cachep) { | ||
2750 | char *name = (char *)kmem_cache_name(cachep); | ||
2751 | kmem_cache_destroy(cachep); | ||
2752 | kfree(name); | ||
2753 | } | ||
2754 | } | ||
2702 | ext4_remove_debugfs_entry(); | 2755 | ext4_remove_debugfs_entry(); |
2703 | } | 2756 | } |
2704 | 2757 | ||
@@ -3535,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) | |||
3535 | */ | 3588 | */ |
3536 | static noinline_for_stack int | 3589 | static noinline_for_stack int |
3537 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | 3590 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, |
3538 | struct ext4_prealloc_space *pa, | 3591 | struct ext4_prealloc_space *pa) |
3539 | struct ext4_allocation_context *ac) | ||
3540 | { | 3592 | { |
3541 | struct super_block *sb = e4b->bd_sb; | 3593 | struct super_block *sb = e4b->bd_sb; |
3542 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3594 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -3554,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3554 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3606 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3555 | end = bit + pa->pa_len; | 3607 | end = bit + pa->pa_len; |
3556 | 3608 | ||
3557 | if (ac) { | ||
3558 | ac->ac_sb = sb; | ||
3559 | ac->ac_inode = pa->pa_inode; | ||
3560 | } | ||
3561 | |||
3562 | while (bit < end) { | 3609 | while (bit < end) { |
3563 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); | 3610 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); |
3564 | if (bit >= end) | 3611 | if (bit >= end) |
@@ -3569,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3569 | (unsigned) next - bit, (unsigned) group); | 3616 | (unsigned) next - bit, (unsigned) group); |
3570 | free += next - bit; | 3617 | free += next - bit; |
3571 | 3618 | ||
3572 | if (ac) { | 3619 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3573 | ac->ac_b_ex.fe_group = group; | 3620 | trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, |
3574 | ac->ac_b_ex.fe_start = bit; | 3621 | grp_blk_start + bit, next - bit); |
3575 | ac->ac_b_ex.fe_len = next - bit; | ||
3576 | ac->ac_b_ex.fe_logical = 0; | ||
3577 | trace_ext4_mballoc_discard(ac); | ||
3578 | } | ||
3579 | |||
3580 | trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit, | ||
3581 | next - bit); | ||
3582 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3622 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3583 | bit = next + 1; | 3623 | bit = next + 1; |
3584 | } | 3624 | } |
@@ -3601,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3601 | 3641 | ||
3602 | static noinline_for_stack int | 3642 | static noinline_for_stack int |
3603 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, | 3643 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, |
3604 | struct ext4_prealloc_space *pa, | 3644 | struct ext4_prealloc_space *pa) |
3605 | struct ext4_allocation_context *ac) | ||
3606 | { | 3645 | { |
3607 | struct super_block *sb = e4b->bd_sb; | 3646 | struct super_block *sb = e4b->bd_sb; |
3608 | ext4_group_t group; | 3647 | ext4_group_t group; |
3609 | ext4_grpblk_t bit; | 3648 | ext4_grpblk_t bit; |
3610 | 3649 | ||
3611 | trace_ext4_mb_release_group_pa(sb, ac, pa); | 3650 | trace_ext4_mb_release_group_pa(sb, pa); |
3612 | BUG_ON(pa->pa_deleted == 0); | 3651 | BUG_ON(pa->pa_deleted == 0); |
3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3652 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3653 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3615 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); | 3654 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
3616 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); | 3655 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
3617 | 3656 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); | |
3618 | if (ac) { | ||
3619 | ac->ac_sb = sb; | ||
3620 | ac->ac_inode = NULL; | ||
3621 | ac->ac_b_ex.fe_group = group; | ||
3622 | ac->ac_b_ex.fe_start = bit; | ||
3623 | ac->ac_b_ex.fe_len = pa->pa_len; | ||
3624 | ac->ac_b_ex.fe_logical = 0; | ||
3625 | trace_ext4_mballoc_discard(ac); | ||
3626 | } | ||
3627 | 3657 | ||
3628 | return 0; | 3658 | return 0; |
3629 | } | 3659 | } |
@@ -3644,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3644 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 3674 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
3645 | struct buffer_head *bitmap_bh = NULL; | 3675 | struct buffer_head *bitmap_bh = NULL; |
3646 | struct ext4_prealloc_space *pa, *tmp; | 3676 | struct ext4_prealloc_space *pa, *tmp; |
3647 | struct ext4_allocation_context *ac; | ||
3648 | struct list_head list; | 3677 | struct list_head list; |
3649 | struct ext4_buddy e4b; | 3678 | struct ext4_buddy e4b; |
3650 | int err; | 3679 | int err; |
@@ -3673,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3673 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3702 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; |
3674 | 3703 | ||
3675 | INIT_LIST_HEAD(&list); | 3704 | INIT_LIST_HEAD(&list); |
3676 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3677 | if (ac) | ||
3678 | ac->ac_sb = sb; | ||
3679 | repeat: | 3705 | repeat: |
3680 | ext4_lock_group(sb, group); | 3706 | ext4_lock_group(sb, group); |
3681 | list_for_each_entry_safe(pa, tmp, | 3707 | list_for_each_entry_safe(pa, tmp, |
@@ -3730,9 +3756,9 @@ repeat: | |||
3730 | spin_unlock(pa->pa_obj_lock); | 3756 | spin_unlock(pa->pa_obj_lock); |
3731 | 3757 | ||
3732 | if (pa->pa_type == MB_GROUP_PA) | 3758 | if (pa->pa_type == MB_GROUP_PA) |
3733 | ext4_mb_release_group_pa(&e4b, pa, ac); | 3759 | ext4_mb_release_group_pa(&e4b, pa); |
3734 | else | 3760 | else |
3735 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3761 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3736 | 3762 | ||
3737 | list_del(&pa->u.pa_tmp_list); | 3763 | list_del(&pa->u.pa_tmp_list); |
3738 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3764 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
@@ -3740,8 +3766,6 @@ repeat: | |||
3740 | 3766 | ||
3741 | out: | 3767 | out: |
3742 | ext4_unlock_group(sb, group); | 3768 | ext4_unlock_group(sb, group); |
3743 | if (ac) | ||
3744 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3745 | ext4_mb_unload_buddy(&e4b); | 3769 | ext4_mb_unload_buddy(&e4b); |
3746 | put_bh(bitmap_bh); | 3770 | put_bh(bitmap_bh); |
3747 | return free; | 3771 | return free; |
@@ -3762,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3762 | struct super_block *sb = inode->i_sb; | 3786 | struct super_block *sb = inode->i_sb; |
3763 | struct buffer_head *bitmap_bh = NULL; | 3787 | struct buffer_head *bitmap_bh = NULL; |
3764 | struct ext4_prealloc_space *pa, *tmp; | 3788 | struct ext4_prealloc_space *pa, *tmp; |
3765 | struct ext4_allocation_context *ac; | ||
3766 | ext4_group_t group = 0; | 3789 | ext4_group_t group = 0; |
3767 | struct list_head list; | 3790 | struct list_head list; |
3768 | struct ext4_buddy e4b; | 3791 | struct ext4_buddy e4b; |
@@ -3778,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3778 | 3801 | ||
3779 | INIT_LIST_HEAD(&list); | 3802 | INIT_LIST_HEAD(&list); |
3780 | 3803 | ||
3781 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3782 | if (ac) { | ||
3783 | ac->ac_sb = sb; | ||
3784 | ac->ac_inode = inode; | ||
3785 | } | ||
3786 | repeat: | 3804 | repeat: |
3787 | /* first, collect all pa's in the inode */ | 3805 | /* first, collect all pa's in the inode */ |
3788 | spin_lock(&ei->i_prealloc_lock); | 3806 | spin_lock(&ei->i_prealloc_lock); |
@@ -3852,7 +3870,7 @@ repeat: | |||
3852 | 3870 | ||
3853 | ext4_lock_group(sb, group); | 3871 | ext4_lock_group(sb, group); |
3854 | list_del(&pa->pa_group_list); | 3872 | list_del(&pa->pa_group_list); |
3855 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3873 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3856 | ext4_unlock_group(sb, group); | 3874 | ext4_unlock_group(sb, group); |
3857 | 3875 | ||
3858 | ext4_mb_unload_buddy(&e4b); | 3876 | ext4_mb_unload_buddy(&e4b); |
@@ -3861,8 +3879,6 @@ repeat: | |||
3861 | list_del(&pa->u.pa_tmp_list); | 3879 | list_del(&pa->u.pa_tmp_list); |
3862 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3880 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
3863 | } | 3881 | } |
3864 | if (ac) | ||
3865 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3866 | } | 3882 | } |
3867 | 3883 | ||
3868 | /* | 3884 | /* |
@@ -4060,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4060 | struct ext4_buddy e4b; | 4076 | struct ext4_buddy e4b; |
4061 | struct list_head discard_list; | 4077 | struct list_head discard_list; |
4062 | struct ext4_prealloc_space *pa, *tmp; | 4078 | struct ext4_prealloc_space *pa, *tmp; |
4063 | struct ext4_allocation_context *ac; | ||
4064 | 4079 | ||
4065 | mb_debug(1, "discard locality group preallocation\n"); | 4080 | mb_debug(1, "discard locality group preallocation\n"); |
4066 | 4081 | ||
4067 | INIT_LIST_HEAD(&discard_list); | 4082 | INIT_LIST_HEAD(&discard_list); |
4068 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4069 | if (ac) | ||
4070 | ac->ac_sb = sb; | ||
4071 | 4083 | ||
4072 | spin_lock(&lg->lg_prealloc_lock); | 4084 | spin_lock(&lg->lg_prealloc_lock); |
4073 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], | 4085 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
@@ -4119,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4119 | } | 4131 | } |
4120 | ext4_lock_group(sb, group); | 4132 | ext4_lock_group(sb, group); |
4121 | list_del(&pa->pa_group_list); | 4133 | list_del(&pa->pa_group_list); |
4122 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4134 | ext4_mb_release_group_pa(&e4b, pa); |
4123 | ext4_unlock_group(sb, group); | 4135 | ext4_unlock_group(sb, group); |
4124 | 4136 | ||
4125 | ext4_mb_unload_buddy(&e4b); | 4137 | ext4_mb_unload_buddy(&e4b); |
4126 | list_del(&pa->u.pa_tmp_list); | 4138 | list_del(&pa->u.pa_tmp_list); |
4127 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4139 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4128 | } | 4140 | } |
4129 | if (ac) | ||
4130 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4131 | } | 4141 | } |
4132 | 4142 | ||
4133 | /* | 4143 | /* |
@@ -4491,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4491 | { | 4501 | { |
4492 | struct buffer_head *bitmap_bh = NULL; | 4502 | struct buffer_head *bitmap_bh = NULL; |
4493 | struct super_block *sb = inode->i_sb; | 4503 | struct super_block *sb = inode->i_sb; |
4494 | struct ext4_allocation_context *ac = NULL; | ||
4495 | struct ext4_group_desc *gdp; | 4504 | struct ext4_group_desc *gdp; |
4496 | unsigned long freed = 0; | 4505 | unsigned long freed = 0; |
4497 | unsigned int overflow; | 4506 | unsigned int overflow; |
@@ -4531,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4531 | if (!bh) | 4540 | if (!bh) |
4532 | tbh = sb_find_get_block(inode->i_sb, | 4541 | tbh = sb_find_get_block(inode->i_sb, |
4533 | block + i); | 4542 | block + i); |
4543 | if (unlikely(!tbh)) | ||
4544 | continue; | ||
4534 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4545 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4535 | inode, tbh, block + i); | 4546 | inode, tbh, block + i); |
4536 | } | 4547 | } |
@@ -4546,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4546 | if (!ext4_should_writeback_data(inode)) | 4557 | if (!ext4_should_writeback_data(inode)) |
4547 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4558 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4548 | 4559 | ||
4549 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4550 | if (ac) { | ||
4551 | ac->ac_inode = inode; | ||
4552 | ac->ac_sb = sb; | ||
4553 | } | ||
4554 | |||
4555 | do_more: | 4560 | do_more: |
4556 | overflow = 0; | 4561 | overflow = 0; |
4557 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4562 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4609,12 +4614,7 @@ do_more: | |||
4609 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4614 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4610 | } | 4615 | } |
4611 | #endif | 4616 | #endif |
4612 | if (ac) { | 4617 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); |
4613 | ac->ac_b_ex.fe_group = block_group; | ||
4614 | ac->ac_b_ex.fe_start = bit; | ||
4615 | ac->ac_b_ex.fe_len = count; | ||
4616 | trace_ext4_mballoc_free(ac); | ||
4617 | } | ||
4618 | 4618 | ||
4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4620 | if (err) | 4620 | if (err) |
@@ -4644,8 +4644,6 @@ do_more: | |||
4644 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4644 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4645 | mb_free_blocks(inode, &e4b, bit, count); | 4645 | mb_free_blocks(inode, &e4b, bit, count); |
4646 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4646 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
4647 | if (test_opt(sb, DISCARD)) | ||
4648 | ext4_issue_discard(sb, block_group, bit, count); | ||
4649 | } | 4647 | } |
4650 | 4648 | ||
4651 | ret = ext4_free_blks_count(sb, gdp) + count; | 4649 | ret = ext4_free_blks_count(sb, gdp) + count; |
@@ -4685,7 +4683,190 @@ error_return: | |||
4685 | dquot_free_block(inode, freed); | 4683 | dquot_free_block(inode, freed); |
4686 | brelse(bitmap_bh); | 4684 | brelse(bitmap_bh); |
4687 | ext4_std_error(sb, err); | 4685 | ext4_std_error(sb, err); |
4688 | if (ac) | ||
4689 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4690 | return; | 4686 | return; |
4691 | } | 4687 | } |
4688 | |||
4689 | /** | ||
4690 | * ext4_trim_extent -- function to TRIM one single free extent in the group | ||
4691 | * @sb: super block for the file system | ||
4692 | * @start: starting block of the free extent in the alloc. group | ||
4693 | * @count: number of blocks to TRIM | ||
4694 | * @group: alloc. group we are working with | ||
4695 | * @e4b: ext4 buddy for the group | ||
4696 | * | ||
4697 | * Trim "count" blocks starting at "start" in the "group". To assure that no | ||
4698 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | ||
4699 | * be called with under the group lock. | ||
4700 | */ | ||
4701 | static int ext4_trim_extent(struct super_block *sb, int start, int count, | ||
4702 | ext4_group_t group, struct ext4_buddy *e4b) | ||
4703 | { | ||
4704 | struct ext4_free_extent ex; | ||
4705 | int ret = 0; | ||
4706 | |||
4707 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | ||
4708 | |||
4709 | ex.fe_start = start; | ||
4710 | ex.fe_group = group; | ||
4711 | ex.fe_len = count; | ||
4712 | |||
4713 | /* | ||
4714 | * Mark blocks used, so no one can reuse them while | ||
4715 | * being trimmed. | ||
4716 | */ | ||
4717 | mb_mark_used(e4b, &ex); | ||
4718 | ext4_unlock_group(sb, group); | ||
4719 | |||
4720 | ret = ext4_issue_discard(sb, group, start, count); | ||
4721 | if (ret) | ||
4722 | ext4_std_error(sb, ret); | ||
4723 | |||
4724 | ext4_lock_group(sb, group); | ||
4725 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | ||
4726 | return ret; | ||
4727 | } | ||
4728 | |||
4729 | /** | ||
4730 | * ext4_trim_all_free -- function to trim all free space in alloc. group | ||
4731 | * @sb: super block for file system | ||
4732 | * @e4b: ext4 buddy | ||
4733 | * @start: first group block to examine | ||
4734 | * @max: last group block to examine | ||
4735 | * @minblocks: minimum extent block count | ||
4736 | * | ||
4737 | * ext4_trim_all_free walks through group's buddy bitmap searching for free | ||
4738 | * extents. When the free block is found, ext4_trim_extent is called to TRIM | ||
4739 | * the extent. | ||
4740 | * | ||
4741 | * | ||
4742 | * ext4_trim_all_free walks through group's block bitmap searching for free | ||
4743 | * extents. When the free extent is found, mark it as used in group buddy | ||
4744 | * bitmap. Then issue a TRIM command on this extent and free the extent in | ||
4745 | * the group buddy bitmap. This is done until whole group is scanned. | ||
4746 | */ | ||
4747 | ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | ||
4748 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | ||
4749 | { | ||
4750 | void *bitmap; | ||
4751 | ext4_grpblk_t next, count = 0; | ||
4752 | ext4_group_t group; | ||
4753 | int ret = 0; | ||
4754 | |||
4755 | BUG_ON(e4b == NULL); | ||
4756 | |||
4757 | bitmap = e4b->bd_bitmap; | ||
4758 | group = e4b->bd_group; | ||
4759 | start = (e4b->bd_info->bb_first_free > start) ? | ||
4760 | e4b->bd_info->bb_first_free : start; | ||
4761 | ext4_lock_group(sb, group); | ||
4762 | |||
4763 | while (start < max) { | ||
4764 | start = mb_find_next_zero_bit(bitmap, max, start); | ||
4765 | if (start >= max) | ||
4766 | break; | ||
4767 | next = mb_find_next_bit(bitmap, max, start); | ||
4768 | |||
4769 | if ((next - start) >= minblocks) { | ||
4770 | ret = ext4_trim_extent(sb, start, | ||
4771 | next - start, group, e4b); | ||
4772 | if (ret < 0) | ||
4773 | break; | ||
4774 | count += next - start; | ||
4775 | } | ||
4776 | start = next + 1; | ||
4777 | |||
4778 | if (fatal_signal_pending(current)) { | ||
4779 | count = -ERESTARTSYS; | ||
4780 | break; | ||
4781 | } | ||
4782 | |||
4783 | if (need_resched()) { | ||
4784 | ext4_unlock_group(sb, group); | ||
4785 | cond_resched(); | ||
4786 | ext4_lock_group(sb, group); | ||
4787 | } | ||
4788 | |||
4789 | if ((e4b->bd_info->bb_free - count) < minblocks) | ||
4790 | break; | ||
4791 | } | ||
4792 | ext4_unlock_group(sb, group); | ||
4793 | |||
4794 | ext4_debug("trimmed %d blocks in the group %d\n", | ||
4795 | count, group); | ||
4796 | |||
4797 | if (ret < 0) | ||
4798 | count = ret; | ||
4799 | |||
4800 | return count; | ||
4801 | } | ||
4802 | |||
4803 | /** | ||
4804 | * ext4_trim_fs() -- trim ioctl handle function | ||
4805 | * @sb: superblock for filesystem | ||
4806 | * @range: fstrim_range structure | ||
4807 | * | ||
4808 | * start: First Byte to trim | ||
4809 | * len: number of Bytes to trim from start | ||
4810 | * minlen: minimum extent length in Bytes | ||
4811 | * ext4_trim_fs goes through all allocation groups containing Bytes from | ||
4812 | * start to start+len. For each such a group ext4_trim_all_free function | ||
4813 | * is invoked to trim all free space. | ||
4814 | */ | ||
4815 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
4816 | { | ||
4817 | struct ext4_buddy e4b; | ||
4818 | ext4_group_t first_group, last_group; | ||
4819 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
4820 | ext4_grpblk_t cnt = 0, first_block, last_block; | ||
4821 | uint64_t start, len, minlen, trimmed; | ||
4822 | int ret = 0; | ||
4823 | |||
4824 | start = range->start >> sb->s_blocksize_bits; | ||
4825 | len = range->len >> sb->s_blocksize_bits; | ||
4826 | minlen = range->minlen >> sb->s_blocksize_bits; | ||
4827 | trimmed = 0; | ||
4828 | |||
4829 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | ||
4830 | return -EINVAL; | ||
4831 | |||
4832 | /* Determine first and last group to examine based on start and len */ | ||
4833 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | ||
4834 | &first_group, &first_block); | ||
4835 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | ||
4836 | &last_group, &last_block); | ||
4837 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
4838 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | ||
4839 | |||
4840 | if (first_group > last_group) | ||
4841 | return -EINVAL; | ||
4842 | |||
4843 | for (group = first_group; group <= last_group; group++) { | ||
4844 | ret = ext4_mb_load_buddy(sb, group, &e4b); | ||
4845 | if (ret) { | ||
4846 | ext4_error(sb, "Error in loading buddy " | ||
4847 | "information for %u", group); | ||
4848 | break; | ||
4849 | } | ||
4850 | |||
4851 | if (len >= EXT4_BLOCKS_PER_GROUP(sb)) | ||
4852 | len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); | ||
4853 | else | ||
4854 | last_block = len; | ||
4855 | |||
4856 | if (e4b.bd_info->bb_free >= minlen) { | ||
4857 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | ||
4858 | last_block, minlen); | ||
4859 | if (cnt < 0) { | ||
4860 | ret = cnt; | ||
4861 | ext4_mb_unload_buddy(&e4b); | ||
4862 | break; | ||
4863 | } | ||
4864 | } | ||
4865 | ext4_mb_unload_buddy(&e4b); | ||
4866 | trimmed += cnt; | ||
4867 | first_block = 0; | ||
4868 | } | ||
4869 | range->len = trimmed * sb->s_blocksize; | ||
4870 | |||
4871 | return ret; | ||
4872 | } | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 1765c2c50a9b..25f3a974b725 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
412 | struct buffer_head *bh; | 412 | struct buffer_head *bh; |
413 | struct ext4_extent_header *eh; | 413 | struct ext4_extent_header *eh; |
414 | 414 | ||
415 | block = idx_pblock(ix); | 415 | block = ext4_idx_pblock(ix); |
416 | bh = sb_bread(inode->i_sb, block); | 416 | bh = sb_bread(inode->i_sb, block); |
417 | if (!bh) | 417 | if (!bh) |
418 | return -EIO; | 418 | return -EIO; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 5f1ed9fc913c..b9f3e7862f13 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | 85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { |
86 | /* leaf block */ | 86 | /* leaf block */ |
87 | *extent = ++path[ppos].p_ext; | 87 | *extent = ++path[ppos].p_ext; |
88 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 88 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
96 | 96 | ||
97 | /* index block */ | 97 | /* index block */ |
98 | path[ppos].p_idx++; | 98 | path[ppos].p_idx++; |
99 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 99 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
100 | if (path[ppos+1].p_bh) | 100 | if (path[ppos+1].p_bh) |
101 | brelse(path[ppos+1].p_bh); | 101 | brelse(path[ppos+1].p_bh); |
102 | path[ppos+1].p_bh = | 102 | path[ppos+1].p_bh = |
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
111 | path[cur_ppos].p_idx = | 111 | path[cur_ppos].p_idx = |
112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); | 112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); |
113 | path[cur_ppos].p_block = | 113 | path[cur_ppos].p_block = |
114 | idx_pblock(path[cur_ppos].p_idx); | 114 | ext4_idx_pblock(path[cur_ppos].p_idx); |
115 | if (path[cur_ppos+1].p_bh) | 115 | if (path[cur_ppos+1].p_bh) |
116 | brelse(path[cur_ppos+1].p_bh); | 116 | brelse(path[cur_ppos+1].p_bh); |
117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, | 117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, |
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
133 | path[leaf_ppos].p_ext = *extent = | 133 | path[leaf_ppos].p_ext = *extent = |
134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | 134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); |
135 | path[leaf_ppos].p_block = | 135 | path[leaf_ppos].p_block = |
136 | ext_pblock(path[leaf_ppos].p_ext); | 136 | ext4_ext_pblock(path[leaf_ppos].p_ext); |
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | } | 139 | } |
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
249 | */ | 249 | */ |
250 | o_end->ee_block = end_ext->ee_block; | 250 | o_end->ee_block = end_ext->ee_block; |
251 | o_end->ee_len = end_ext->ee_len; | 251 | o_end->ee_len = end_ext->ee_len; |
252 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 252 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
253 | } | 253 | } |
254 | 254 | ||
255 | o_start->ee_len = start_ext->ee_len; | 255 | o_start->ee_len = start_ext->ee_len; |
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
276 | */ | 276 | */ |
277 | o_end->ee_block = end_ext->ee_block; | 277 | o_end->ee_block = end_ext->ee_block; |
278 | o_end->ee_len = end_ext->ee_len; | 278 | o_end->ee_len = end_ext->ee_len; |
279 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 279 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
280 | 280 | ||
281 | /* | 281 | /* |
282 | * Set 0 to the extent block if new_ext was | 282 | * Set 0 to the extent block if new_ext was |
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start, | |||
361 | /* Insert new entry */ | 361 | /* Insert new entry */ |
362 | if (new_ext->ee_len) { | 362 | if (new_ext->ee_len) { |
363 | o_start[i] = *new_ext; | 363 | o_start[i] = *new_ext; |
364 | ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); | 364 | ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); |
365 | } | 365 | } |
366 | 366 | ||
367 | /* Insert end entry */ | 367 | /* Insert end entry */ |
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
488 | start_ext.ee_len = end_ext.ee_len = 0; | 488 | start_ext.ee_len = end_ext.ee_len = 0; |
489 | 489 | ||
490 | new_ext.ee_block = cpu_to_le32(*from); | 490 | new_ext.ee_block = cpu_to_le32(*from); |
491 | ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); | 491 | ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); |
492 | new_ext.ee_len = dext->ee_len; | 492 | new_ext.ee_len = dext->ee_len; |
493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | 493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); |
494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | 494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; |
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
553 | copy_extent_status(oext, &end_ext); | 553 | copy_extent_status(oext, &end_ext); |
554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); | 554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); |
555 | ext4_ext_store_pblock(&end_ext, | 555 | ext4_ext_store_pblock(&end_ext, |
556 | (ext_pblock(o_end) + oext_alen - end_ext_alen)); | 556 | (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); |
557 | end_ext.ee_block = | 557 | end_ext.ee_block = |
558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + | 558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + |
559 | oext_alen - end_ext_alen); | 559 | oext_alen - end_ext_alen); |
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
604 | /* When tmp_dext is too large, pick up the target range. */ | 604 | /* When tmp_dext is too large, pick up the target range. */ |
605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | 605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); |
606 | 606 | ||
607 | ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); | 607 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); |
608 | tmp_dext->ee_block = | 608 | tmp_dext->ee_block = |
609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); | 609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); |
610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); | 610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); |
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
613 | tmp_dext->ee_len = cpu_to_le16(max_count); | 613 | tmp_dext->ee_len = cpu_to_le16(max_count); |
614 | 614 | ||
615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); | 615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); |
616 | ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); | 616 | ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); |
617 | 617 | ||
618 | /* Adjust extent length if donor extent is larger than orig */ | 618 | /* Adjust extent length if donor extent is larger than orig */ |
619 | if (ext4_ext_get_actual_len(tmp_dext) > | 619 | if (ext4_ext_get_actual_len(tmp_dext) > |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 314c0d3b3fa9..92203b8a099f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
856 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; | 856 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
857 | struct buffer_head *bh, *ret = NULL; | 857 | struct buffer_head *bh, *ret = NULL; |
858 | ext4_lblk_t start, block, b; | 858 | ext4_lblk_t start, block, b; |
859 | const u8 *name = d_name->name; | ||
859 | int ra_max = 0; /* Number of bh's in the readahead | 860 | int ra_max = 0; /* Number of bh's in the readahead |
860 | buffer, bh_use[] */ | 861 | buffer, bh_use[] */ |
861 | int ra_ptr = 0; /* Current index into readahead | 862 | int ra_ptr = 0; /* Current index into readahead |
@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
870 | namelen = d_name->len; | 871 | namelen = d_name->len; |
871 | if (namelen > EXT4_NAME_LEN) | 872 | if (namelen > EXT4_NAME_LEN) |
872 | return NULL; | 873 | return NULL; |
874 | if ((namelen <= 2) && (name[0] == '.') && | ||
875 | (name[1] == '.' || name[1] == '0')) { | ||
876 | /* | ||
877 | * "." or ".." will only be in the first block | ||
878 | * NFS may look up ".."; "." should be handled by the VFS | ||
879 | */ | ||
880 | block = start = 0; | ||
881 | nblocks = 1; | ||
882 | goto restart; | ||
883 | } | ||
873 | if (is_dx(dir)) { | 884 | if (is_dx(dir)) { |
874 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); | 885 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
875 | /* | 886 | /* |
@@ -960,55 +971,35 @@ cleanup_and_exit: | |||
960 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, | 971 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
961 | struct ext4_dir_entry_2 **res_dir, int *err) | 972 | struct ext4_dir_entry_2 **res_dir, int *err) |
962 | { | 973 | { |
963 | struct super_block * sb; | 974 | struct super_block * sb = dir->i_sb; |
964 | struct dx_hash_info hinfo; | 975 | struct dx_hash_info hinfo; |
965 | u32 hash; | ||
966 | struct dx_frame frames[2], *frame; | 976 | struct dx_frame frames[2], *frame; |
967 | struct ext4_dir_entry_2 *de, *top; | ||
968 | struct buffer_head *bh; | 977 | struct buffer_head *bh; |
969 | ext4_lblk_t block; | 978 | ext4_lblk_t block; |
970 | int retval; | 979 | int retval; |
971 | int namelen = d_name->len; | ||
972 | const u8 *name = d_name->name; | ||
973 | 980 | ||
974 | sb = dir->i_sb; | 981 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
975 | /* NFS may look up ".." - look at dx_root directory block */ | 982 | return NULL; |
976 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | ||
977 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) | ||
978 | return NULL; | ||
979 | } else { | ||
980 | frame = frames; | ||
981 | frame->bh = NULL; /* for dx_release() */ | ||
982 | frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ | ||
983 | dx_set_block(frame->at, 0); /* dx_root block is 0 */ | ||
984 | } | ||
985 | hash = hinfo.hash; | ||
986 | do { | 983 | do { |
987 | block = dx_get_block(frame->at); | 984 | block = dx_get_block(frame->at); |
988 | if (!(bh = ext4_bread (NULL,dir, block, 0, err))) | 985 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) |
989 | goto errout; | 986 | goto errout; |
990 | de = (struct ext4_dir_entry_2 *) bh->b_data; | ||
991 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - | ||
992 | EXT4_DIR_REC_LEN(0)); | ||
993 | for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) { | ||
994 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) | ||
995 | + ((char *) de - bh->b_data); | ||
996 | |||
997 | if (!ext4_check_dir_entry(dir, de, bh, off)) { | ||
998 | brelse(bh); | ||
999 | *err = ERR_BAD_DX_DIR; | ||
1000 | goto errout; | ||
1001 | } | ||
1002 | 987 | ||
1003 | if (ext4_match(namelen, name, de)) { | 988 | retval = search_dirblock(bh, dir, d_name, |
1004 | *res_dir = de; | 989 | block << EXT4_BLOCK_SIZE_BITS(sb), |
1005 | dx_release(frames); | 990 | res_dir); |
1006 | return bh; | 991 | if (retval == 1) { /* Success! */ |
1007 | } | 992 | dx_release(frames); |
993 | return bh; | ||
1008 | } | 994 | } |
1009 | brelse(bh); | 995 | brelse(bh); |
996 | if (retval == -1) { | ||
997 | *err = ERR_BAD_DX_DIR; | ||
998 | goto errout; | ||
999 | } | ||
1000 | |||
1010 | /* Check to see if we should continue to search */ | 1001 | /* Check to see if we should continue to search */ |
1011 | retval = ext4_htree_next_block(dir, hash, frame, | 1002 | retval = ext4_htree_next_block(dir, hinfo.hash, frame, |
1012 | frames, NULL); | 1003 | frames, NULL); |
1013 | if (retval < 0) { | 1004 | if (retval < 0) { |
1014 | ext4_warning(sb, | 1005 | ext4_warning(sb, |
@@ -2312,7 +2303,7 @@ retry: | |||
2312 | 2303 | ||
2313 | inode->i_ctime = ext4_current_time(inode); | 2304 | inode->i_ctime = ext4_current_time(inode); |
2314 | ext4_inc_count(handle, inode); | 2305 | ext4_inc_count(handle, inode); |
2315 | atomic_inc(&inode->i_count); | 2306 | ihold(inode); |
2316 | 2307 | ||
2317 | err = ext4_add_entry(handle, dentry, inode); | 2308 | err = ext4_add_entry(handle, dentry, inode); |
2318 | if (!err) { | 2309 | if (!err) { |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c new file mode 100644 index 000000000000..beacce11ac50 --- /dev/null +++ b/fs/ext4/page-io.c | |||
@@ -0,0 +1,431 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/page-io.c | ||
3 | * | ||
4 | * This contains the new page_io functions for ext4 | ||
5 | * | ||
6 | * Written by Theodore Ts'o, 2010. | ||
7 | */ | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/time.h> | ||
12 | #include <linux/jbd2.h> | ||
13 | #include <linux/highuid.h> | ||
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/quotaops.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/buffer_head.h> | ||
18 | #include <linux/writeback.h> | ||
19 | #include <linux/pagevec.h> | ||
20 | #include <linux/mpage.h> | ||
21 | #include <linux/namei.h> | ||
22 | #include <linux/uio.h> | ||
23 | #include <linux/bio.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | #include "ext4_jbd2.h" | ||
29 | #include "xattr.h" | ||
30 | #include "acl.h" | ||
31 | #include "ext4_extents.h" | ||
32 | |||
33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | ||
34 | |||
35 | #define WQ_HASH_SZ 37 | ||
36 | #define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ]) | ||
37 | static wait_queue_head_t ioend_wq[WQ_HASH_SZ]; | ||
38 | |||
39 | int __init ext4_init_pageio(void) | ||
40 | { | ||
41 | int i; | ||
42 | |||
43 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | ||
44 | if (io_page_cachep == NULL) | ||
45 | return -ENOMEM; | ||
46 | io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); | ||
47 | if (io_page_cachep == NULL) { | ||
48 | kmem_cache_destroy(io_page_cachep); | ||
49 | return -ENOMEM; | ||
50 | } | ||
51 | for (i = 0; i < WQ_HASH_SZ; i++) | ||
52 | init_waitqueue_head(&ioend_wq[i]); | ||
53 | |||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | void ext4_exit_pageio(void) | ||
58 | { | ||
59 | kmem_cache_destroy(io_end_cachep); | ||
60 | kmem_cache_destroy(io_page_cachep); | ||
61 | } | ||
62 | |||
63 | void ext4_ioend_wait(struct inode *inode) | ||
64 | { | ||
65 | wait_queue_head_t *wq = to_ioend_wq(inode); | ||
66 | |||
67 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | ||
68 | } | ||
69 | |||
70 | static void put_io_page(struct ext4_io_page *io_page) | ||
71 | { | ||
72 | if (atomic_dec_and_test(&io_page->p_count)) { | ||
73 | end_page_writeback(io_page->p_page); | ||
74 | put_page(io_page->p_page); | ||
75 | kmem_cache_free(io_page_cachep, io_page); | ||
76 | } | ||
77 | } | ||
78 | |||
79 | void ext4_free_io_end(ext4_io_end_t *io) | ||
80 | { | ||
81 | int i; | ||
82 | wait_queue_head_t *wq; | ||
83 | |||
84 | BUG_ON(!io); | ||
85 | if (io->page) | ||
86 | put_page(io->page); | ||
87 | for (i = 0; i < io->num_io_pages; i++) | ||
88 | put_io_page(io->pages[i]); | ||
89 | io->num_io_pages = 0; | ||
90 | wq = to_ioend_wq(io->inode); | ||
91 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && | ||
92 | waitqueue_active(wq)) | ||
93 | wake_up_all(wq); | ||
94 | kmem_cache_free(io_end_cachep, io); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * check a range of space and convert unwritten extents to written. | ||
99 | */ | ||
100 | int ext4_end_io_nolock(ext4_io_end_t *io) | ||
101 | { | ||
102 | struct inode *inode = io->inode; | ||
103 | loff_t offset = io->offset; | ||
104 | ssize_t size = io->size; | ||
105 | int ret = 0; | ||
106 | |||
107 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
108 | "list->prev 0x%p\n", | ||
109 | io, inode->i_ino, io->list.next, io->list.prev); | ||
110 | |||
111 | if (list_empty(&io->list)) | ||
112 | return ret; | ||
113 | |||
114 | if (!(io->flag & EXT4_IO_END_UNWRITTEN)) | ||
115 | return ret; | ||
116 | |||
117 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
118 | if (ret < 0) { | ||
119 | printk(KERN_EMERG "%s: failed to convert unwritten " | ||
120 | "extents to written extents, error is %d " | ||
121 | "io is still on inode %lu aio dio list\n", | ||
122 | __func__, ret, inode->i_ino); | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | if (io->iocb) | ||
127 | aio_complete(io->iocb, io->result, 0); | ||
128 | /* clear the DIO AIO unwritten flag */ | ||
129 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
130 | return ret; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
135 | */ | ||
136 | static void ext4_end_io_work(struct work_struct *work) | ||
137 | { | ||
138 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
139 | struct inode *inode = io->inode; | ||
140 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
141 | unsigned long flags; | ||
142 | int ret; | ||
143 | |||
144 | mutex_lock(&inode->i_mutex); | ||
145 | ret = ext4_end_io_nolock(io); | ||
146 | if (ret < 0) { | ||
147 | mutex_unlock(&inode->i_mutex); | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
152 | if (!list_empty(&io->list)) | ||
153 | list_del_init(&io->list); | ||
154 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
155 | mutex_unlock(&inode->i_mutex); | ||
156 | ext4_free_io_end(io); | ||
157 | } | ||
158 | |||
159 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | ||
160 | { | ||
161 | ext4_io_end_t *io = NULL; | ||
162 | |||
163 | io = kmem_cache_alloc(io_end_cachep, flags); | ||
164 | if (io) { | ||
165 | memset(io, 0, sizeof(*io)); | ||
166 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | ||
167 | io->inode = inode; | ||
168 | INIT_WORK(&io->work, ext4_end_io_work); | ||
169 | INIT_LIST_HEAD(&io->list); | ||
170 | } | ||
171 | return io; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
176 | * provides compatibility with dmesg scrapers that look for a specific | ||
177 | * buffer I/O error message. We really need a unified error reporting | ||
178 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
179 | * probably not going to happen in my lifetime, due to LKML politics... | ||
180 | */ | ||
181 | static void buffer_io_error(struct buffer_head *bh) | ||
182 | { | ||
183 | char b[BDEVNAME_SIZE]; | ||
184 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
185 | bdevname(bh->b_bdev, b), | ||
186 | (unsigned long long)bh->b_blocknr); | ||
187 | } | ||
188 | |||
189 | static void ext4_end_bio(struct bio *bio, int error) | ||
190 | { | ||
191 | ext4_io_end_t *io_end = bio->bi_private; | ||
192 | struct workqueue_struct *wq; | ||
193 | struct inode *inode; | ||
194 | unsigned long flags; | ||
195 | int i; | ||
196 | |||
197 | BUG_ON(!io_end); | ||
198 | bio->bi_private = NULL; | ||
199 | bio->bi_end_io = NULL; | ||
200 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
201 | error = 0; | ||
202 | bio_put(bio); | ||
203 | |||
204 | for (i = 0; i < io_end->num_io_pages; i++) { | ||
205 | struct page *page = io_end->pages[i]->p_page; | ||
206 | struct buffer_head *bh, *head; | ||
207 | int partial_write = 0; | ||
208 | |||
209 | head = page_buffers(page); | ||
210 | if (error) | ||
211 | SetPageError(page); | ||
212 | BUG_ON(!head); | ||
213 | if (head->b_size == PAGE_CACHE_SIZE) | ||
214 | clear_buffer_dirty(head); | ||
215 | else { | ||
216 | loff_t offset; | ||
217 | loff_t io_end_offset = io_end->offset + io_end->size; | ||
218 | |||
219 | offset = (sector_t) page->index << PAGE_CACHE_SHIFT; | ||
220 | bh = head; | ||
221 | do { | ||
222 | if ((offset >= io_end->offset) && | ||
223 | (offset+bh->b_size <= io_end_offset)) { | ||
224 | if (error) | ||
225 | buffer_io_error(bh); | ||
226 | |||
227 | clear_buffer_dirty(bh); | ||
228 | } | ||
229 | if (buffer_delay(bh)) | ||
230 | partial_write = 1; | ||
231 | else if (!buffer_mapped(bh)) | ||
232 | clear_buffer_dirty(bh); | ||
233 | else if (buffer_dirty(bh)) | ||
234 | partial_write = 1; | ||
235 | offset += bh->b_size; | ||
236 | bh = bh->b_this_page; | ||
237 | } while (bh != head); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * If this is a partial write which happened to make | ||
242 | * all buffers uptodate then we can optimize away a | ||
243 | * bogus readpage() for the next read(). Here we | ||
244 | * 'discover' whether the page went uptodate as a | ||
245 | * result of this (potentially partial) write. | ||
246 | */ | ||
247 | if (!partial_write) | ||
248 | SetPageUptodate(page); | ||
249 | |||
250 | put_io_page(io_end->pages[i]); | ||
251 | } | ||
252 | io_end->num_io_pages = 0; | ||
253 | inode = io_end->inode; | ||
254 | |||
255 | if (error) { | ||
256 | io_end->flag |= EXT4_IO_END_ERROR; | ||
257 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | ||
258 | "(offset %llu size %ld starting block %llu)", | ||
259 | inode->i_ino, | ||
260 | (unsigned long long) io_end->offset, | ||
261 | (long) io_end->size, | ||
262 | (unsigned long long) | ||
263 | bio->bi_sector >> (inode->i_blkbits - 9)); | ||
264 | } | ||
265 | |||
266 | /* Add the io_end to per-inode completed io list*/ | ||
267 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
268 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
269 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
270 | |||
271 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
272 | /* queue the work to convert unwritten extents to written */ | ||
273 | queue_work(wq, &io_end->work); | ||
274 | } | ||
275 | |||
276 | void ext4_io_submit(struct ext4_io_submit *io) | ||
277 | { | ||
278 | struct bio *bio = io->io_bio; | ||
279 | |||
280 | if (bio) { | ||
281 | bio_get(io->io_bio); | ||
282 | submit_bio(io->io_op, io->io_bio); | ||
283 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); | ||
284 | bio_put(io->io_bio); | ||
285 | } | ||
286 | io->io_bio = 0; | ||
287 | io->io_op = 0; | ||
288 | io->io_end = 0; | ||
289 | } | ||
290 | |||
291 | static int io_submit_init(struct ext4_io_submit *io, | ||
292 | struct inode *inode, | ||
293 | struct writeback_control *wbc, | ||
294 | struct buffer_head *bh) | ||
295 | { | ||
296 | ext4_io_end_t *io_end; | ||
297 | struct page *page = bh->b_page; | ||
298 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | ||
299 | struct bio *bio; | ||
300 | |||
301 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
302 | if (!io_end) | ||
303 | return -ENOMEM; | ||
304 | do { | ||
305 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
306 | nvecs >>= 1; | ||
307 | } while (bio == NULL); | ||
308 | |||
309 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | ||
310 | bio->bi_bdev = bh->b_bdev; | ||
311 | bio->bi_private = io->io_end = io_end; | ||
312 | bio->bi_end_io = ext4_end_bio; | ||
313 | |||
314 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
315 | |||
316 | io->io_bio = bio; | ||
317 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? | ||
318 | WRITE_SYNC_PLUG : WRITE); | ||
319 | io->io_next_block = bh->b_blocknr; | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | static int io_submit_add_bh(struct ext4_io_submit *io, | ||
324 | struct ext4_io_page *io_page, | ||
325 | struct inode *inode, | ||
326 | struct writeback_control *wbc, | ||
327 | struct buffer_head *bh) | ||
328 | { | ||
329 | ext4_io_end_t *io_end; | ||
330 | int ret; | ||
331 | |||
332 | if (buffer_new(bh)) { | ||
333 | clear_buffer_new(bh); | ||
334 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
335 | } | ||
336 | |||
337 | if (!buffer_mapped(bh) || buffer_delay(bh)) { | ||
338 | if (!buffer_mapped(bh)) | ||
339 | clear_buffer_dirty(bh); | ||
340 | if (io->io_bio) | ||
341 | ext4_io_submit(io); | ||
342 | return 0; | ||
343 | } | ||
344 | |||
345 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | ||
346 | submit_and_retry: | ||
347 | ext4_io_submit(io); | ||
348 | } | ||
349 | if (io->io_bio == NULL) { | ||
350 | ret = io_submit_init(io, inode, wbc, bh); | ||
351 | if (ret) | ||
352 | return ret; | ||
353 | } | ||
354 | io_end = io->io_end; | ||
355 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | ||
356 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | ||
357 | goto submit_and_retry; | ||
358 | if (buffer_uninit(bh)) | ||
359 | io->io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
360 | io->io_end->size += bh->b_size; | ||
361 | io->io_next_block++; | ||
362 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
363 | if (ret != bh->b_size) | ||
364 | goto submit_and_retry; | ||
365 | if ((io_end->num_io_pages == 0) || | ||
366 | (io_end->pages[io_end->num_io_pages-1] != io_page)) { | ||
367 | io_end->pages[io_end->num_io_pages++] = io_page; | ||
368 | atomic_inc(&io_page->p_count); | ||
369 | } | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | int ext4_bio_write_page(struct ext4_io_submit *io, | ||
374 | struct page *page, | ||
375 | int len, | ||
376 | struct writeback_control *wbc) | ||
377 | { | ||
378 | struct inode *inode = page->mapping->host; | ||
379 | unsigned block_start, block_end, blocksize; | ||
380 | struct ext4_io_page *io_page; | ||
381 | struct buffer_head *bh, *head; | ||
382 | int ret = 0; | ||
383 | |||
384 | blocksize = 1 << inode->i_blkbits; | ||
385 | |||
386 | BUG_ON(PageWriteback(page)); | ||
387 | set_page_writeback(page); | ||
388 | ClearPageError(page); | ||
389 | |||
390 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | ||
391 | if (!io_page) { | ||
392 | set_page_dirty(page); | ||
393 | unlock_page(page); | ||
394 | return -ENOMEM; | ||
395 | } | ||
396 | io_page->p_page = page; | ||
397 | atomic_set(&io_page->p_count, 1); | ||
398 | get_page(page); | ||
399 | |||
400 | for (bh = head = page_buffers(page), block_start = 0; | ||
401 | bh != head || !block_start; | ||
402 | block_start = block_end, bh = bh->b_this_page) { | ||
403 | block_end = block_start + blocksize; | ||
404 | if (block_start >= len) { | ||
405 | clear_buffer_dirty(bh); | ||
406 | set_buffer_uptodate(bh); | ||
407 | continue; | ||
408 | } | ||
409 | ret = io_submit_add_bh(io, io_page, inode, wbc, bh); | ||
410 | if (ret) { | ||
411 | /* | ||
412 | * We only get here on ENOMEM. Not much else | ||
413 | * we can do but mark the page as dirty, and | ||
414 | * better luck next time. | ||
415 | */ | ||
416 | set_page_dirty(page); | ||
417 | break; | ||
418 | } | ||
419 | } | ||
420 | unlock_page(page); | ||
421 | /* | ||
422 | * If the page was truncated before we could do the writeback, | ||
423 | * or we had a memory allocation error while trying to write | ||
424 | * the first buffer head, we won't have submitted any pages for | ||
425 | * I/O. In that case we need to make sure we've cleared the | ||
426 | * PageWriteback bit from the page to prevent the system from | ||
427 | * wedging later on. | ||
428 | */ | ||
429 | put_io_page(io_page); | ||
430 | return ret; | ||
431 | } | ||
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ca5c8aa00a2f..dc963929de65 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
226 | } | 226 | } |
227 | 227 | ||
228 | /* Zero out all of the reserved backup group descriptor table blocks */ | 228 | /* Zero out all of the reserved backup group descriptor table blocks */ |
229 | for (i = 0, bit = gdblocks + 1, block = start + bit; | 229 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", |
230 | i < reserved_gdb; i++, block++, bit++) { | 230 | block, sbi->s_itb_per_group); |
231 | struct buffer_head *gdb; | 231 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
232 | 232 | GFP_NOFS); | |
233 | ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); | 233 | if (err) |
234 | 234 | goto exit_bh; | |
235 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | ||
236 | goto exit_bh; | ||
237 | 235 | ||
238 | if (IS_ERR(gdb = bclean(handle, sb, block))) { | ||
239 | err = PTR_ERR(gdb); | ||
240 | goto exit_bh; | ||
241 | } | ||
242 | ext4_handle_dirty_metadata(handle, NULL, gdb); | ||
243 | ext4_set_bit(bit, bh->b_data); | ||
244 | brelse(gdb); | ||
245 | } | ||
246 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | 236 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, |
247 | input->block_bitmap - start); | 237 | input->block_bitmap - start); |
248 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | 238 | ext4_set_bit(input->block_bitmap - start, bh->b_data); |
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
251 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | 241 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); |
252 | 242 | ||
253 | /* Zero out all of the inode table blocks */ | 243 | /* Zero out all of the inode table blocks */ |
254 | for (i = 0, block = input->inode_table, bit = block - start; | 244 | block = input->inode_table; |
255 | i < sbi->s_itb_per_group; i++, bit++, block++) { | 245 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", |
256 | struct buffer_head *it; | 246 | block, sbi->s_itb_per_group); |
257 | 247 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | |
258 | ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); | 248 | if (err) |
259 | 249 | goto exit_bh; | |
260 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | ||
261 | goto exit_bh; | ||
262 | |||
263 | if (IS_ERR(it = bclean(handle, sb, block))) { | ||
264 | err = PTR_ERR(it); | ||
265 | goto exit_bh; | ||
266 | } | ||
267 | ext4_handle_dirty_metadata(handle, NULL, it); | ||
268 | brelse(it); | ||
269 | ext4_set_bit(bit, bh->b_data); | ||
270 | } | ||
271 | 250 | ||
272 | if ((err = extend_or_restart_transaction(handle, 2, bh))) | 251 | if ((err = extend_or_restart_transaction(handle, 2, bh))) |
273 | goto exit_bh; | 252 | goto exit_bh; |
274 | 253 | ||
275 | mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); | 254 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, |
255 | bh->b_data); | ||
276 | ext4_handle_dirty_metadata(handle, NULL, bh); | 256 | ext4_handle_dirty_metadata(handle, NULL, bh); |
277 | brelse(bh); | 257 | brelse(bh); |
278 | /* Mark unused entries in inode bitmap used */ | 258 | /* Mark unused entries in inode bitmap used */ |
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
283 | goto exit_journal; | 263 | goto exit_journal; |
284 | } | 264 | } |
285 | 265 | ||
286 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 266 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
287 | bh->b_data); | 267 | bh->b_data); |
288 | ext4_handle_dirty_metadata(handle, NULL, bh); | 268 | ext4_handle_dirty_metadata(handle, NULL, bh); |
289 | exit_bh: | 269 | exit_bh: |
290 | brelse(bh); | 270 | brelse(bh); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8ecc1e590303..e32195d6aac3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -40,6 +40,9 @@ | |||
40 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | 42 | ||
43 | #include <linux/kthread.h> | ||
44 | #include <linux/freezer.h> | ||
45 | |||
43 | #include "ext4.h" | 46 | #include "ext4.h" |
44 | #include "ext4_jbd2.h" | 47 | #include "ext4_jbd2.h" |
45 | #include "xattr.h" | 48 | #include "xattr.h" |
@@ -49,8 +52,11 @@ | |||
49 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
50 | #include <trace/events/ext4.h> | 53 | #include <trace/events/ext4.h> |
51 | 54 | ||
52 | struct proc_dir_entry *ext4_proc_root; | 55 | static struct proc_dir_entry *ext4_proc_root; |
53 | static struct kset *ext4_kset; | 56 | static struct kset *ext4_kset; |
57 | struct ext4_lazy_init *ext4_li_info; | ||
58 | struct mutex ext4_li_mtx; | ||
59 | struct ext4_features *ext4_feat; | ||
54 | 60 | ||
55 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
56 | unsigned long journal_devnum); | 62 | unsigned long journal_devnum); |
@@ -67,14 +73,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | |||
67 | static int ext4_unfreeze(struct super_block *sb); | 73 | static int ext4_unfreeze(struct super_block *sb); |
68 | static void ext4_write_super(struct super_block *sb); | 74 | static void ext4_write_super(struct super_block *sb); |
69 | static int ext4_freeze(struct super_block *sb); | 75 | static int ext4_freeze(struct super_block *sb); |
70 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, | 76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
71 | const char *dev_name, void *data, struct vfsmount *mnt); | 77 | const char *dev_name, void *data); |
78 | static void ext4_destroy_lazyinit_thread(void); | ||
79 | static void ext4_unregister_li_request(struct super_block *sb); | ||
72 | 80 | ||
73 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 81 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
74 | static struct file_system_type ext3_fs_type = { | 82 | static struct file_system_type ext3_fs_type = { |
75 | .owner = THIS_MODULE, | 83 | .owner = THIS_MODULE, |
76 | .name = "ext3", | 84 | .name = "ext3", |
77 | .get_sb = ext4_get_sb, | 85 | .mount = ext4_mount, |
78 | .kill_sb = kill_block_super, | 86 | .kill_sb = kill_block_super, |
79 | .fs_flags = FS_REQUIRES_DEV, | 87 | .fs_flags = FS_REQUIRES_DEV, |
80 | }; | 88 | }; |
@@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb) | |||
701 | struct ext4_super_block *es = sbi->s_es; | 709 | struct ext4_super_block *es = sbi->s_es; |
702 | int i, err; | 710 | int i, err; |
703 | 711 | ||
712 | ext4_unregister_li_request(sb); | ||
704 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 713 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
705 | 714 | ||
706 | flush_workqueue(sbi->dio_unwritten_wq); | 715 | flush_workqueue(sbi->dio_unwritten_wq); |
@@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb) | |||
717 | ext4_abort(sb, "Couldn't clean up the journal"); | 726 | ext4_abort(sb, "Couldn't clean up the journal"); |
718 | } | 727 | } |
719 | 728 | ||
729 | del_timer(&sbi->s_err_report); | ||
720 | ext4_release_system_zone(sb); | 730 | ext4_release_system_zone(sb); |
721 | ext4_mb_release(sb); | 731 | ext4_mb_release(sb); |
722 | ext4_ext_release(sb); | 732 | ext4_ext_release(sb); |
@@ -818,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
818 | ei->cur_aio_dio = NULL; | 828 | ei->cur_aio_dio = NULL; |
819 | ei->i_sync_tid = 0; | 829 | ei->i_sync_tid = 0; |
820 | ei->i_datasync_tid = 0; | 830 | ei->i_datasync_tid = 0; |
831 | atomic_set(&ei->i_ioend_count, 0); | ||
821 | 832 | ||
822 | return &ei->vfs_inode; | 833 | return &ei->vfs_inode; |
823 | } | 834 | } |
824 | 835 | ||
836 | static int ext4_drop_inode(struct inode *inode) | ||
837 | { | ||
838 | int drop = generic_drop_inode(inode); | ||
839 | |||
840 | trace_ext4_drop_inode(inode, drop); | ||
841 | return drop; | ||
842 | } | ||
843 | |||
825 | static void ext4_destroy_inode(struct inode *inode) | 844 | static void ext4_destroy_inode(struct inode *inode) |
826 | { | 845 | { |
846 | ext4_ioend_wait(inode); | ||
827 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 847 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
828 | ext4_msg(inode->i_sb, KERN_ERR, | 848 | ext4_msg(inode->i_sb, KERN_ERR, |
829 | "Inode %lu (%p): orphan list check failed!", | 849 | "Inode %lu (%p): orphan list check failed!", |
@@ -1042,6 +1062,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1042 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) | 1062 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) |
1043 | seq_puts(seq, ",block_validity"); | 1063 | seq_puts(seq, ",block_validity"); |
1044 | 1064 | ||
1065 | if (!test_opt(sb, INIT_INODE_TABLE)) | ||
1066 | seq_puts(seq, ",noinit_inode_table"); | ||
1067 | else if (sbi->s_li_wait_mult) | ||
1068 | seq_printf(seq, ",init_inode_table=%u", | ||
1069 | (unsigned) sbi->s_li_wait_mult); | ||
1070 | |||
1045 | ext4_show_quota_options(seq, sb); | 1071 | ext4_show_quota_options(seq, sb); |
1046 | 1072 | ||
1047 | return 0; | 1073 | return 0; |
@@ -1157,6 +1183,7 @@ static const struct super_operations ext4_sops = { | |||
1157 | .destroy_inode = ext4_destroy_inode, | 1183 | .destroy_inode = ext4_destroy_inode, |
1158 | .write_inode = ext4_write_inode, | 1184 | .write_inode = ext4_write_inode, |
1159 | .dirty_inode = ext4_dirty_inode, | 1185 | .dirty_inode = ext4_dirty_inode, |
1186 | .drop_inode = ext4_drop_inode, | ||
1160 | .evict_inode = ext4_evict_inode, | 1187 | .evict_inode = ext4_evict_inode, |
1161 | .put_super = ext4_put_super, | 1188 | .put_super = ext4_put_super, |
1162 | .sync_fs = ext4_sync_fs, | 1189 | .sync_fs = ext4_sync_fs, |
@@ -1177,6 +1204,7 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1177 | .destroy_inode = ext4_destroy_inode, | 1204 | .destroy_inode = ext4_destroy_inode, |
1178 | .write_inode = ext4_write_inode, | 1205 | .write_inode = ext4_write_inode, |
1179 | .dirty_inode = ext4_dirty_inode, | 1206 | .dirty_inode = ext4_dirty_inode, |
1207 | .drop_inode = ext4_drop_inode, | ||
1180 | .evict_inode = ext4_evict_inode, | 1208 | .evict_inode = ext4_evict_inode, |
1181 | .write_super = ext4_write_super, | 1209 | .write_super = ext4_write_super, |
1182 | .put_super = ext4_put_super, | 1210 | .put_super = ext4_put_super, |
@@ -1216,6 +1244,7 @@ enum { | |||
1216 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1244 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1217 | Opt_dioread_nolock, Opt_dioread_lock, | 1245 | Opt_dioread_nolock, Opt_dioread_lock, |
1218 | Opt_discard, Opt_nodiscard, | 1246 | Opt_discard, Opt_nodiscard, |
1247 | Opt_init_inode_table, Opt_noinit_inode_table, | ||
1219 | }; | 1248 | }; |
1220 | 1249 | ||
1221 | static const match_table_t tokens = { | 1250 | static const match_table_t tokens = { |
@@ -1286,6 +1315,9 @@ static const match_table_t tokens = { | |||
1286 | {Opt_dioread_lock, "dioread_lock"}, | 1315 | {Opt_dioread_lock, "dioread_lock"}, |
1287 | {Opt_discard, "discard"}, | 1316 | {Opt_discard, "discard"}, |
1288 | {Opt_nodiscard, "nodiscard"}, | 1317 | {Opt_nodiscard, "nodiscard"}, |
1318 | {Opt_init_inode_table, "init_itable=%u"}, | ||
1319 | {Opt_init_inode_table, "init_itable"}, | ||
1320 | {Opt_noinit_inode_table, "noinit_itable"}, | ||
1289 | {Opt_err, NULL}, | 1321 | {Opt_err, NULL}, |
1290 | }; | 1322 | }; |
1291 | 1323 | ||
@@ -1756,6 +1788,20 @@ set_qf_format: | |||
1756 | case Opt_dioread_lock: | 1788 | case Opt_dioread_lock: |
1757 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | 1789 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); |
1758 | break; | 1790 | break; |
1791 | case Opt_init_inode_table: | ||
1792 | set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
1793 | if (args[0].from) { | ||
1794 | if (match_int(&args[0], &option)) | ||
1795 | return 0; | ||
1796 | } else | ||
1797 | option = EXT4_DEF_LI_WAIT_MULT; | ||
1798 | if (option < 0) | ||
1799 | return 0; | ||
1800 | sbi->s_li_wait_mult = option; | ||
1801 | break; | ||
1802 | case Opt_noinit_inode_table: | ||
1803 | clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
1804 | break; | ||
1759 | default: | 1805 | default: |
1760 | ext4_msg(sb, KERN_ERR, | 1806 | ext4_msg(sb, KERN_ERR, |
1761 | "Unrecognized mount option \"%s\" " | 1807 | "Unrecognized mount option \"%s\" " |
@@ -1939,7 +1985,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, | |||
1939 | } | 1985 | } |
1940 | 1986 | ||
1941 | /* Called at mount-time, super-block is locked */ | 1987 | /* Called at mount-time, super-block is locked */ |
1942 | static int ext4_check_descriptors(struct super_block *sb) | 1988 | static int ext4_check_descriptors(struct super_block *sb, |
1989 | ext4_group_t *first_not_zeroed) | ||
1943 | { | 1990 | { |
1944 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1991 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1945 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); | 1992 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); |
@@ -1948,7 +1995,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1948 | ext4_fsblk_t inode_bitmap; | 1995 | ext4_fsblk_t inode_bitmap; |
1949 | ext4_fsblk_t inode_table; | 1996 | ext4_fsblk_t inode_table; |
1950 | int flexbg_flag = 0; | 1997 | int flexbg_flag = 0; |
1951 | ext4_group_t i; | 1998 | ext4_group_t i, grp = sbi->s_groups_count; |
1952 | 1999 | ||
1953 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 2000 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1954 | flexbg_flag = 1; | 2001 | flexbg_flag = 1; |
@@ -1964,6 +2011,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1964 | last_block = first_block + | 2011 | last_block = first_block + |
1965 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2012 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); |
1966 | 2013 | ||
2014 | if ((grp == sbi->s_groups_count) && | ||
2015 | !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2016 | grp = i; | ||
2017 | |||
1967 | block_bitmap = ext4_block_bitmap(sb, gdp); | 2018 | block_bitmap = ext4_block_bitmap(sb, gdp); |
1968 | if (block_bitmap < first_block || block_bitmap > last_block) { | 2019 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1969 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2020 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
@@ -2001,6 +2052,8 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
2001 | if (!flexbg_flag) | 2052 | if (!flexbg_flag) |
2002 | first_block += EXT4_BLOCKS_PER_GROUP(sb); | 2053 | first_block += EXT4_BLOCKS_PER_GROUP(sb); |
2003 | } | 2054 | } |
2055 | if (NULL != first_not_zeroed) | ||
2056 | *first_not_zeroed = grp; | ||
2004 | 2057 | ||
2005 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 2058 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); |
2006 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); | 2059 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
@@ -2373,6 +2426,7 @@ static struct ext4_attr ext4_attr_##_name = { \ | |||
2373 | #define EXT4_ATTR(name, mode, show, store) \ | 2426 | #define EXT4_ATTR(name, mode, show, store) \ |
2374 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2427 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
2375 | 2428 | ||
2429 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) | ||
2376 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) | 2430 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) |
2377 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) | 2431 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) |
2378 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2432 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
@@ -2409,6 +2463,16 @@ static struct attribute *ext4_attrs[] = { | |||
2409 | NULL, | 2463 | NULL, |
2410 | }; | 2464 | }; |
2411 | 2465 | ||
2466 | /* Features this copy of ext4 supports */ | ||
2467 | EXT4_INFO_ATTR(lazy_itable_init); | ||
2468 | EXT4_INFO_ATTR(batched_discard); | ||
2469 | |||
2470 | static struct attribute *ext4_feat_attrs[] = { | ||
2471 | ATTR_LIST(lazy_itable_init), | ||
2472 | ATTR_LIST(batched_discard), | ||
2473 | NULL, | ||
2474 | }; | ||
2475 | |||
2412 | static ssize_t ext4_attr_show(struct kobject *kobj, | 2476 | static ssize_t ext4_attr_show(struct kobject *kobj, |
2413 | struct attribute *attr, char *buf) | 2477 | struct attribute *attr, char *buf) |
2414 | { | 2478 | { |
@@ -2437,7 +2501,6 @@ static void ext4_sb_release(struct kobject *kobj) | |||
2437 | complete(&sbi->s_kobj_unregister); | 2501 | complete(&sbi->s_kobj_unregister); |
2438 | } | 2502 | } |
2439 | 2503 | ||
2440 | |||
2441 | static const struct sysfs_ops ext4_attr_ops = { | 2504 | static const struct sysfs_ops ext4_attr_ops = { |
2442 | .show = ext4_attr_show, | 2505 | .show = ext4_attr_show, |
2443 | .store = ext4_attr_store, | 2506 | .store = ext4_attr_store, |
@@ -2449,6 +2512,17 @@ static struct kobj_type ext4_ktype = { | |||
2449 | .release = ext4_sb_release, | 2512 | .release = ext4_sb_release, |
2450 | }; | 2513 | }; |
2451 | 2514 | ||
2515 | static void ext4_feat_release(struct kobject *kobj) | ||
2516 | { | ||
2517 | complete(&ext4_feat->f_kobj_unregister); | ||
2518 | } | ||
2519 | |||
2520 | static struct kobj_type ext4_feat_ktype = { | ||
2521 | .default_attrs = ext4_feat_attrs, | ||
2522 | .sysfs_ops = &ext4_attr_ops, | ||
2523 | .release = ext4_feat_release, | ||
2524 | }; | ||
2525 | |||
2452 | /* | 2526 | /* |
2453 | * Check whether this filesystem can be mounted based on | 2527 | * Check whether this filesystem can be mounted based on |
2454 | * the features present and the RDONLY/RDWR mount requested. | 2528 | * the features present and the RDONLY/RDWR mount requested. |
@@ -2539,6 +2613,368 @@ static void print_daily_error_info(unsigned long arg) | |||
2539 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ | 2613 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ |
2540 | } | 2614 | } |
2541 | 2615 | ||
2616 | static void ext4_lazyinode_timeout(unsigned long data) | ||
2617 | { | ||
2618 | struct task_struct *p = (struct task_struct *)data; | ||
2619 | wake_up_process(p); | ||
2620 | } | ||
2621 | |||
2622 | /* Find next suitable group and run ext4_init_inode_table */ | ||
2623 | static int ext4_run_li_request(struct ext4_li_request *elr) | ||
2624 | { | ||
2625 | struct ext4_group_desc *gdp = NULL; | ||
2626 | ext4_group_t group, ngroups; | ||
2627 | struct super_block *sb; | ||
2628 | unsigned long timeout = 0; | ||
2629 | int ret = 0; | ||
2630 | |||
2631 | sb = elr->lr_super; | ||
2632 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
2633 | |||
2634 | for (group = elr->lr_next_group; group < ngroups; group++) { | ||
2635 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2636 | if (!gdp) { | ||
2637 | ret = 1; | ||
2638 | break; | ||
2639 | } | ||
2640 | |||
2641 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2642 | break; | ||
2643 | } | ||
2644 | |||
2645 | if (group == ngroups) | ||
2646 | ret = 1; | ||
2647 | |||
2648 | if (!ret) { | ||
2649 | timeout = jiffies; | ||
2650 | ret = ext4_init_inode_table(sb, group, | ||
2651 | elr->lr_timeout ? 0 : 1); | ||
2652 | if (elr->lr_timeout == 0) { | ||
2653 | timeout = jiffies - timeout; | ||
2654 | if (elr->lr_sbi->s_li_wait_mult) | ||
2655 | timeout *= elr->lr_sbi->s_li_wait_mult; | ||
2656 | else | ||
2657 | timeout *= 20; | ||
2658 | elr->lr_timeout = timeout; | ||
2659 | } | ||
2660 | elr->lr_next_sched = jiffies + elr->lr_timeout; | ||
2661 | elr->lr_next_group = group + 1; | ||
2662 | } | ||
2663 | |||
2664 | return ret; | ||
2665 | } | ||
2666 | |||
2667 | /* | ||
2668 | * Remove lr_request from the list_request and free the | ||
2669 | * request tructure. Should be called with li_list_mtx held | ||
2670 | */ | ||
2671 | static void ext4_remove_li_request(struct ext4_li_request *elr) | ||
2672 | { | ||
2673 | struct ext4_sb_info *sbi; | ||
2674 | |||
2675 | if (!elr) | ||
2676 | return; | ||
2677 | |||
2678 | sbi = elr->lr_sbi; | ||
2679 | |||
2680 | list_del(&elr->lr_request); | ||
2681 | sbi->s_li_request = NULL; | ||
2682 | kfree(elr); | ||
2683 | } | ||
2684 | |||
2685 | static void ext4_unregister_li_request(struct super_block *sb) | ||
2686 | { | ||
2687 | struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; | ||
2688 | |||
2689 | if (!ext4_li_info) | ||
2690 | return; | ||
2691 | |||
2692 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2693 | ext4_remove_li_request(elr); | ||
2694 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2695 | } | ||
2696 | |||
2697 | /* | ||
2698 | * This is the function where ext4lazyinit thread lives. It walks | ||
2699 | * through the request list searching for next scheduled filesystem. | ||
2700 | * When such a fs is found, run the lazy initialization request | ||
2701 | * (ext4_rn_li_request) and keep track of the time spend in this | ||
2702 | * function. Based on that time we compute next schedule time of | ||
2703 | * the request. When walking through the list is complete, compute | ||
2704 | * next waking time and put itself into sleep. | ||
2705 | */ | ||
2706 | static int ext4_lazyinit_thread(void *arg) | ||
2707 | { | ||
2708 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; | ||
2709 | struct list_head *pos, *n; | ||
2710 | struct ext4_li_request *elr; | ||
2711 | unsigned long next_wakeup; | ||
2712 | DEFINE_WAIT(wait); | ||
2713 | |||
2714 | BUG_ON(NULL == eli); | ||
2715 | |||
2716 | eli->li_timer.data = (unsigned long)current; | ||
2717 | eli->li_timer.function = ext4_lazyinode_timeout; | ||
2718 | |||
2719 | eli->li_task = current; | ||
2720 | wake_up(&eli->li_wait_task); | ||
2721 | |||
2722 | cont_thread: | ||
2723 | while (true) { | ||
2724 | next_wakeup = MAX_JIFFY_OFFSET; | ||
2725 | |||
2726 | mutex_lock(&eli->li_list_mtx); | ||
2727 | if (list_empty(&eli->li_request_list)) { | ||
2728 | mutex_unlock(&eli->li_list_mtx); | ||
2729 | goto exit_thread; | ||
2730 | } | ||
2731 | |||
2732 | list_for_each_safe(pos, n, &eli->li_request_list) { | ||
2733 | elr = list_entry(pos, struct ext4_li_request, | ||
2734 | lr_request); | ||
2735 | |||
2736 | if (time_after_eq(jiffies, elr->lr_next_sched)) { | ||
2737 | if (ext4_run_li_request(elr) != 0) { | ||
2738 | /* error, remove the lazy_init job */ | ||
2739 | ext4_remove_li_request(elr); | ||
2740 | continue; | ||
2741 | } | ||
2742 | } | ||
2743 | |||
2744 | if (time_before(elr->lr_next_sched, next_wakeup)) | ||
2745 | next_wakeup = elr->lr_next_sched; | ||
2746 | } | ||
2747 | mutex_unlock(&eli->li_list_mtx); | ||
2748 | |||
2749 | if (freezing(current)) | ||
2750 | refrigerator(); | ||
2751 | |||
2752 | if ((time_after_eq(jiffies, next_wakeup)) || | ||
2753 | (MAX_JIFFY_OFFSET == next_wakeup)) { | ||
2754 | cond_resched(); | ||
2755 | continue; | ||
2756 | } | ||
2757 | |||
2758 | eli->li_timer.expires = next_wakeup; | ||
2759 | add_timer(&eli->li_timer); | ||
2760 | prepare_to_wait(&eli->li_wait_daemon, &wait, | ||
2761 | TASK_INTERRUPTIBLE); | ||
2762 | if (time_before(jiffies, next_wakeup)) | ||
2763 | schedule(); | ||
2764 | finish_wait(&eli->li_wait_daemon, &wait); | ||
2765 | } | ||
2766 | |||
2767 | exit_thread: | ||
2768 | /* | ||
2769 | * It looks like the request list is empty, but we need | ||
2770 | * to check it under the li_list_mtx lock, to prevent any | ||
2771 | * additions into it, and of course we should lock ext4_li_mtx | ||
2772 | * to atomically free the list and ext4_li_info, because at | ||
2773 | * this point another ext4 filesystem could be registering | ||
2774 | * new one. | ||
2775 | */ | ||
2776 | mutex_lock(&ext4_li_mtx); | ||
2777 | mutex_lock(&eli->li_list_mtx); | ||
2778 | if (!list_empty(&eli->li_request_list)) { | ||
2779 | mutex_unlock(&eli->li_list_mtx); | ||
2780 | mutex_unlock(&ext4_li_mtx); | ||
2781 | goto cont_thread; | ||
2782 | } | ||
2783 | mutex_unlock(&eli->li_list_mtx); | ||
2784 | del_timer_sync(&ext4_li_info->li_timer); | ||
2785 | eli->li_task = NULL; | ||
2786 | wake_up(&eli->li_wait_task); | ||
2787 | |||
2788 | kfree(ext4_li_info); | ||
2789 | ext4_li_info = NULL; | ||
2790 | mutex_unlock(&ext4_li_mtx); | ||
2791 | |||
2792 | return 0; | ||
2793 | } | ||
2794 | |||
2795 | static void ext4_clear_request_list(void) | ||
2796 | { | ||
2797 | struct list_head *pos, *n; | ||
2798 | struct ext4_li_request *elr; | ||
2799 | |||
2800 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2801 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { | ||
2802 | elr = list_entry(pos, struct ext4_li_request, | ||
2803 | lr_request); | ||
2804 | ext4_remove_li_request(elr); | ||
2805 | } | ||
2806 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2807 | } | ||
2808 | |||
2809 | static int ext4_run_lazyinit_thread(void) | ||
2810 | { | ||
2811 | struct task_struct *t; | ||
2812 | |||
2813 | t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); | ||
2814 | if (IS_ERR(t)) { | ||
2815 | int err = PTR_ERR(t); | ||
2816 | ext4_clear_request_list(); | ||
2817 | del_timer_sync(&ext4_li_info->li_timer); | ||
2818 | kfree(ext4_li_info); | ||
2819 | ext4_li_info = NULL; | ||
2820 | printk(KERN_CRIT "EXT4: error %d creating inode table " | ||
2821 | "initialization thread\n", | ||
2822 | err); | ||
2823 | return err; | ||
2824 | } | ||
2825 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; | ||
2826 | |||
2827 | wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL); | ||
2828 | return 0; | ||
2829 | } | ||
2830 | |||
2831 | /* | ||
2832 | * Check whether it make sense to run itable init. thread or not. | ||
2833 | * If there is at least one uninitialized inode table, return | ||
2834 | * corresponding group number, else the loop goes through all | ||
2835 | * groups and return total number of groups. | ||
2836 | */ | ||
2837 | static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) | ||
2838 | { | ||
2839 | ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; | ||
2840 | struct ext4_group_desc *gdp = NULL; | ||
2841 | |||
2842 | for (group = 0; group < ngroups; group++) { | ||
2843 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2844 | if (!gdp) | ||
2845 | continue; | ||
2846 | |||
2847 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2848 | break; | ||
2849 | } | ||
2850 | |||
2851 | return group; | ||
2852 | } | ||
2853 | |||
2854 | static int ext4_li_info_new(void) | ||
2855 | { | ||
2856 | struct ext4_lazy_init *eli = NULL; | ||
2857 | |||
2858 | eli = kzalloc(sizeof(*eli), GFP_KERNEL); | ||
2859 | if (!eli) | ||
2860 | return -ENOMEM; | ||
2861 | |||
2862 | eli->li_task = NULL; | ||
2863 | INIT_LIST_HEAD(&eli->li_request_list); | ||
2864 | mutex_init(&eli->li_list_mtx); | ||
2865 | |||
2866 | init_waitqueue_head(&eli->li_wait_daemon); | ||
2867 | init_waitqueue_head(&eli->li_wait_task); | ||
2868 | init_timer(&eli->li_timer); | ||
2869 | eli->li_state |= EXT4_LAZYINIT_QUIT; | ||
2870 | |||
2871 | ext4_li_info = eli; | ||
2872 | |||
2873 | return 0; | ||
2874 | } | ||
2875 | |||
2876 | static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, | ||
2877 | ext4_group_t start) | ||
2878 | { | ||
2879 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2880 | struct ext4_li_request *elr; | ||
2881 | unsigned long rnd; | ||
2882 | |||
2883 | elr = kzalloc(sizeof(*elr), GFP_KERNEL); | ||
2884 | if (!elr) | ||
2885 | return NULL; | ||
2886 | |||
2887 | elr->lr_super = sb; | ||
2888 | elr->lr_sbi = sbi; | ||
2889 | elr->lr_next_group = start; | ||
2890 | |||
2891 | /* | ||
2892 | * Randomize first schedule time of the request to | ||
2893 | * spread the inode table initialization requests | ||
2894 | * better. | ||
2895 | */ | ||
2896 | get_random_bytes(&rnd, sizeof(rnd)); | ||
2897 | elr->lr_next_sched = jiffies + (unsigned long)rnd % | ||
2898 | (EXT4_DEF_LI_MAX_START_DELAY * HZ); | ||
2899 | |||
2900 | return elr; | ||
2901 | } | ||
2902 | |||
2903 | static int ext4_register_li_request(struct super_block *sb, | ||
2904 | ext4_group_t first_not_zeroed) | ||
2905 | { | ||
2906 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2907 | struct ext4_li_request *elr; | ||
2908 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
2909 | int ret; | ||
2910 | |||
2911 | if (sbi->s_li_request != NULL) | ||
2912 | return 0; | ||
2913 | |||
2914 | if (first_not_zeroed == ngroups || | ||
2915 | (sb->s_flags & MS_RDONLY) || | ||
2916 | !test_opt(sb, INIT_INODE_TABLE)) { | ||
2917 | sbi->s_li_request = NULL; | ||
2918 | return 0; | ||
2919 | } | ||
2920 | |||
2921 | if (first_not_zeroed == ngroups) { | ||
2922 | sbi->s_li_request = NULL; | ||
2923 | return 0; | ||
2924 | } | ||
2925 | |||
2926 | elr = ext4_li_request_new(sb, first_not_zeroed); | ||
2927 | if (!elr) | ||
2928 | return -ENOMEM; | ||
2929 | |||
2930 | mutex_lock(&ext4_li_mtx); | ||
2931 | |||
2932 | if (NULL == ext4_li_info) { | ||
2933 | ret = ext4_li_info_new(); | ||
2934 | if (ret) | ||
2935 | goto out; | ||
2936 | } | ||
2937 | |||
2938 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2939 | list_add(&elr->lr_request, &ext4_li_info->li_request_list); | ||
2940 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2941 | |||
2942 | sbi->s_li_request = elr; | ||
2943 | |||
2944 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { | ||
2945 | ret = ext4_run_lazyinit_thread(); | ||
2946 | if (ret) | ||
2947 | goto out; | ||
2948 | } | ||
2949 | out: | ||
2950 | mutex_unlock(&ext4_li_mtx); | ||
2951 | if (ret) | ||
2952 | kfree(elr); | ||
2953 | return ret; | ||
2954 | } | ||
2955 | |||
2956 | /* | ||
2957 | * We do not need to lock anything since this is called on | ||
2958 | * module unload. | ||
2959 | */ | ||
2960 | static void ext4_destroy_lazyinit_thread(void) | ||
2961 | { | ||
2962 | /* | ||
2963 | * If thread exited earlier | ||
2964 | * there's nothing to be done. | ||
2965 | */ | ||
2966 | if (!ext4_li_info) | ||
2967 | return; | ||
2968 | |||
2969 | ext4_clear_request_list(); | ||
2970 | |||
2971 | while (ext4_li_info->li_task) { | ||
2972 | wake_up(&ext4_li_info->li_wait_daemon); | ||
2973 | wait_event(ext4_li_info->li_wait_task, | ||
2974 | ext4_li_info->li_task == NULL); | ||
2975 | } | ||
2976 | } | ||
2977 | |||
2542 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2978 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2543 | __releases(kernel_lock) | 2979 | __releases(kernel_lock) |
2544 | __acquires(kernel_lock) | 2980 | __acquires(kernel_lock) |
@@ -2564,6 +3000,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2564 | __u64 blocks_count; | 3000 | __u64 blocks_count; |
2565 | int err; | 3001 | int err; |
2566 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 3002 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
3003 | ext4_group_t first_not_zeroed; | ||
2567 | 3004 | ||
2568 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 3005 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
2569 | if (!sbi) | 3006 | if (!sbi) |
@@ -2624,6 +3061,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2624 | 3061 | ||
2625 | /* Set defaults before we parse the mount options */ | 3062 | /* Set defaults before we parse the mount options */ |
2626 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 3063 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
3064 | set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
2627 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 3065 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
2628 | set_opt(sbi->s_mount_opt, DEBUG); | 3066 | set_opt(sbi->s_mount_opt, DEBUG); |
2629 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { | 3067 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { |
@@ -2826,13 +3264,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2826 | * Test whether we have more sectors than will fit in sector_t, | 3264 | * Test whether we have more sectors than will fit in sector_t, |
2827 | * and whether the max offset is addressable by the page cache. | 3265 | * and whether the max offset is addressable by the page cache. |
2828 | */ | 3266 | */ |
2829 | ret = generic_check_addressable(sb->s_blocksize_bits, | 3267 | err = generic_check_addressable(sb->s_blocksize_bits, |
2830 | ext4_blocks_count(es)); | 3268 | ext4_blocks_count(es)); |
2831 | if (ret) { | 3269 | if (err) { |
2832 | ext4_msg(sb, KERN_ERR, "filesystem" | 3270 | ext4_msg(sb, KERN_ERR, "filesystem" |
2833 | " too large to mount safely on this system"); | 3271 | " too large to mount safely on this system"); |
2834 | if (sizeof(sector_t) < 8) | 3272 | if (sizeof(sector_t) < 8) |
2835 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 3273 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
3274 | ret = err; | ||
2836 | goto failed_mount; | 3275 | goto failed_mount; |
2837 | } | 3276 | } |
2838 | 3277 | ||
@@ -2901,7 +3340,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2901 | goto failed_mount2; | 3340 | goto failed_mount2; |
2902 | } | 3341 | } |
2903 | } | 3342 | } |
2904 | if (!ext4_check_descriptors(sb)) { | 3343 | if (!ext4_check_descriptors(sb, &first_not_zeroed)) { |
2905 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); | 3344 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
2906 | goto failed_mount2; | 3345 | goto failed_mount2; |
2907 | } | 3346 | } |
@@ -2917,6 +3356,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2917 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 3356 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
2918 | spin_lock_init(&sbi->s_next_gen_lock); | 3357 | spin_lock_init(&sbi->s_next_gen_lock); |
2919 | 3358 | ||
3359 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
3360 | ext4_count_free_blocks(sb)); | ||
3361 | if (!err) { | ||
3362 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
3363 | ext4_count_free_inodes(sb)); | ||
3364 | } | ||
3365 | if (!err) { | ||
3366 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
3367 | ext4_count_dirs(sb)); | ||
3368 | } | ||
3369 | if (!err) { | ||
3370 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
3371 | } | ||
3372 | if (err) { | ||
3373 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
3374 | goto failed_mount3; | ||
3375 | } | ||
3376 | |||
2920 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3377 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2921 | sbi->s_max_writeback_mb_bump = 128; | 3378 | sbi->s_max_writeback_mb_bump = 128; |
2922 | 3379 | ||
@@ -3015,22 +3472,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3015 | } | 3472 | } |
3016 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 3473 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
3017 | 3474 | ||
3018 | no_journal: | 3475 | /* |
3019 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 3476 | * The journal may have updated the bg summary counts, so we |
3020 | ext4_count_free_blocks(sb)); | 3477 | * need to update the global counters. |
3021 | if (!err) | 3478 | */ |
3022 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | 3479 | percpu_counter_set(&sbi->s_freeblocks_counter, |
3023 | ext4_count_free_inodes(sb)); | 3480 | ext4_count_free_blocks(sb)); |
3024 | if (!err) | 3481 | percpu_counter_set(&sbi->s_freeinodes_counter, |
3025 | err = percpu_counter_init(&sbi->s_dirs_counter, | 3482 | ext4_count_free_inodes(sb)); |
3026 | ext4_count_dirs(sb)); | 3483 | percpu_counter_set(&sbi->s_dirs_counter, |
3027 | if (!err) | 3484 | ext4_count_dirs(sb)); |
3028 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | 3485 | percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); |
3029 | if (err) { | ||
3030 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
3031 | goto failed_mount_wq; | ||
3032 | } | ||
3033 | 3486 | ||
3487 | no_journal: | ||
3034 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); | 3488 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); |
3035 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3489 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
3036 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3490 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
@@ -3122,6 +3576,10 @@ no_journal: | |||
3122 | goto failed_mount4; | 3576 | goto failed_mount4; |
3123 | } | 3577 | } |
3124 | 3578 | ||
3579 | err = ext4_register_li_request(sb, first_not_zeroed); | ||
3580 | if (err) | ||
3581 | goto failed_mount4; | ||
3582 | |||
3125 | sbi->s_kobj.kset = ext4_kset; | 3583 | sbi->s_kobj.kset = ext4_kset; |
3126 | init_completion(&sbi->s_kobj_unregister); | 3584 | init_completion(&sbi->s_kobj_unregister); |
3127 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, | 3585 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, |
@@ -3176,10 +3634,6 @@ failed_mount_wq: | |||
3176 | jbd2_journal_destroy(sbi->s_journal); | 3634 | jbd2_journal_destroy(sbi->s_journal); |
3177 | sbi->s_journal = NULL; | 3635 | sbi->s_journal = NULL; |
3178 | } | 3636 | } |
3179 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3180 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3181 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3182 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3183 | failed_mount3: | 3637 | failed_mount3: |
3184 | if (sbi->s_flex_groups) { | 3638 | if (sbi->s_flex_groups) { |
3185 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 3639 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
@@ -3187,6 +3641,10 @@ failed_mount3: | |||
3187 | else | 3641 | else |
3188 | kfree(sbi->s_flex_groups); | 3642 | kfree(sbi->s_flex_groups); |
3189 | } | 3643 | } |
3644 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3645 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3646 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3647 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3190 | failed_mount2: | 3648 | failed_mount2: |
3191 | for (i = 0; i < db_count; i++) | 3649 | for (i = 0; i < db_count; i++) |
3192 | brelse(sbi->s_group_desc[i]); | 3650 | brelse(sbi->s_group_desc[i]); |
@@ -3461,7 +3919,7 @@ static int ext4_load_journal(struct super_block *sb, | |||
3461 | EXT4_SB(sb)->s_journal = journal; | 3919 | EXT4_SB(sb)->s_journal = journal; |
3462 | ext4_clear_journal_err(sb, es); | 3920 | ext4_clear_journal_err(sb, es); |
3463 | 3921 | ||
3464 | if (journal_devnum && | 3922 | if (!really_read_only && journal_devnum && |
3465 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 3923 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3466 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 3924 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
3467 | 3925 | ||
@@ -3515,9 +3973,10 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3515 | es->s_kbytes_written = | 3973 | es->s_kbytes_written = |
3516 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | 3974 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); |
3517 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 3975 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( |
3518 | &EXT4_SB(sb)->s_freeblocks_counter)); | 3976 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3519 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | 3977 | es->s_free_inodes_count = |
3520 | &EXT4_SB(sb)->s_freeinodes_counter)); | 3978 | cpu_to_le32(percpu_counter_sum_positive( |
3979 | &EXT4_SB(sb)->s_freeinodes_counter)); | ||
3521 | sb->s_dirt = 0; | 3980 | sb->s_dirt = 0; |
3522 | BUFFER_TRACE(sbh, "marking dirty"); | 3981 | BUFFER_TRACE(sbh, "marking dirty"); |
3523 | mark_buffer_dirty(sbh); | 3982 | mark_buffer_dirty(sbh); |
@@ -3835,6 +4294,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3835 | enable_quota = 1; | 4294 | enable_quota = 1; |
3836 | } | 4295 | } |
3837 | } | 4296 | } |
4297 | |||
4298 | /* | ||
4299 | * Reinitialize lazy itable initialization thread based on | ||
4300 | * current settings | ||
4301 | */ | ||
4302 | if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) | ||
4303 | ext4_unregister_li_request(sb); | ||
4304 | else { | ||
4305 | ext4_group_t first_not_zeroed; | ||
4306 | first_not_zeroed = ext4_has_uninit_itable(sb); | ||
4307 | ext4_register_li_request(sb, first_not_zeroed); | ||
4308 | } | ||
4309 | |||
3838 | ext4_setup_system_zone(sb); | 4310 | ext4_setup_system_zone(sb); |
3839 | if (sbi->s_journal == NULL) | 4311 | if (sbi->s_journal == NULL) |
3840 | ext4_commit_super(sb, 1); | 4312 | ext4_commit_super(sb, 1); |
@@ -4105,12 +4577,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
4105 | 4577 | ||
4106 | static int ext4_quota_off(struct super_block *sb, int type) | 4578 | static int ext4_quota_off(struct super_block *sb, int type) |
4107 | { | 4579 | { |
4108 | /* Force all delayed allocation blocks to be allocated */ | 4580 | /* Force all delayed allocation blocks to be allocated. |
4109 | if (test_opt(sb, DELALLOC)) { | 4581 | * Caller already holds s_umount sem */ |
4110 | down_read(&sb->s_umount); | 4582 | if (test_opt(sb, DELALLOC)) |
4111 | sync_filesystem(sb); | 4583 | sync_filesystem(sb); |
4112 | up_read(&sb->s_umount); | ||
4113 | } | ||
4114 | 4584 | ||
4115 | return dquot_quota_off(sb, type); | 4585 | return dquot_quota_off(sb, type); |
4116 | } | 4586 | } |
@@ -4216,17 +4686,17 @@ out: | |||
4216 | 4686 | ||
4217 | #endif | 4687 | #endif |
4218 | 4688 | ||
4219 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, | 4689 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
4220 | const char *dev_name, void *data, struct vfsmount *mnt) | 4690 | const char *dev_name, void *data) |
4221 | { | 4691 | { |
4222 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); | 4692 | return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); |
4223 | } | 4693 | } |
4224 | 4694 | ||
4225 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 4695 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
4226 | static struct file_system_type ext2_fs_type = { | 4696 | static struct file_system_type ext2_fs_type = { |
4227 | .owner = THIS_MODULE, | 4697 | .owner = THIS_MODULE, |
4228 | .name = "ext2", | 4698 | .name = "ext2", |
4229 | .get_sb = ext4_get_sb, | 4699 | .mount = ext4_mount, |
4230 | .kill_sb = kill_block_super, | 4700 | .kill_sb = kill_block_super, |
4231 | .fs_flags = FS_REQUIRES_DEV, | 4701 | .fs_flags = FS_REQUIRES_DEV, |
4232 | }; | 4702 | }; |
@@ -4271,28 +4741,58 @@ static inline void unregister_as_ext3(void) { } | |||
4271 | static struct file_system_type ext4_fs_type = { | 4741 | static struct file_system_type ext4_fs_type = { |
4272 | .owner = THIS_MODULE, | 4742 | .owner = THIS_MODULE, |
4273 | .name = "ext4", | 4743 | .name = "ext4", |
4274 | .get_sb = ext4_get_sb, | 4744 | .mount = ext4_mount, |
4275 | .kill_sb = kill_block_super, | 4745 | .kill_sb = kill_block_super, |
4276 | .fs_flags = FS_REQUIRES_DEV, | 4746 | .fs_flags = FS_REQUIRES_DEV, |
4277 | }; | 4747 | }; |
4278 | 4748 | ||
4279 | static int __init init_ext4_fs(void) | 4749 | int __init ext4_init_feat_adverts(void) |
4750 | { | ||
4751 | struct ext4_features *ef; | ||
4752 | int ret = -ENOMEM; | ||
4753 | |||
4754 | ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); | ||
4755 | if (!ef) | ||
4756 | goto out; | ||
4757 | |||
4758 | ef->f_kobj.kset = ext4_kset; | ||
4759 | init_completion(&ef->f_kobj_unregister); | ||
4760 | ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, | ||
4761 | "features"); | ||
4762 | if (ret) { | ||
4763 | kfree(ef); | ||
4764 | goto out; | ||
4765 | } | ||
4766 | |||
4767 | ext4_feat = ef; | ||
4768 | ret = 0; | ||
4769 | out: | ||
4770 | return ret; | ||
4771 | } | ||
4772 | |||
4773 | static int __init ext4_init_fs(void) | ||
4280 | { | 4774 | { |
4281 | int err; | 4775 | int err; |
4282 | 4776 | ||
4283 | ext4_check_flag_values(); | 4777 | ext4_check_flag_values(); |
4284 | err = init_ext4_system_zone(); | 4778 | err = ext4_init_pageio(); |
4285 | if (err) | 4779 | if (err) |
4286 | return err; | 4780 | return err; |
4781 | err = ext4_init_system_zone(); | ||
4782 | if (err) | ||
4783 | goto out5; | ||
4287 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 4784 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
4288 | if (!ext4_kset) | 4785 | if (!ext4_kset) |
4289 | goto out4; | 4786 | goto out4; |
4290 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 4787 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
4291 | err = init_ext4_mballoc(); | 4788 | |
4789 | err = ext4_init_feat_adverts(); | ||
4790 | |||
4791 | err = ext4_init_mballoc(); | ||
4292 | if (err) | 4792 | if (err) |
4293 | goto out3; | 4793 | goto out3; |
4294 | 4794 | ||
4295 | err = init_ext4_xattr(); | 4795 | err = ext4_init_xattr(); |
4296 | if (err) | 4796 | if (err) |
4297 | goto out2; | 4797 | goto out2; |
4298 | err = init_inodecache(); | 4798 | err = init_inodecache(); |
@@ -4303,38 +4803,46 @@ static int __init init_ext4_fs(void) | |||
4303 | err = register_filesystem(&ext4_fs_type); | 4803 | err = register_filesystem(&ext4_fs_type); |
4304 | if (err) | 4804 | if (err) |
4305 | goto out; | 4805 | goto out; |
4806 | |||
4807 | ext4_li_info = NULL; | ||
4808 | mutex_init(&ext4_li_mtx); | ||
4306 | return 0; | 4809 | return 0; |
4307 | out: | 4810 | out: |
4308 | unregister_as_ext2(); | 4811 | unregister_as_ext2(); |
4309 | unregister_as_ext3(); | 4812 | unregister_as_ext3(); |
4310 | destroy_inodecache(); | 4813 | destroy_inodecache(); |
4311 | out1: | 4814 | out1: |
4312 | exit_ext4_xattr(); | 4815 | ext4_exit_xattr(); |
4313 | out2: | 4816 | out2: |
4314 | exit_ext4_mballoc(); | 4817 | ext4_exit_mballoc(); |
4315 | out3: | 4818 | out3: |
4819 | kfree(ext4_feat); | ||
4316 | remove_proc_entry("fs/ext4", NULL); | 4820 | remove_proc_entry("fs/ext4", NULL); |
4317 | kset_unregister(ext4_kset); | 4821 | kset_unregister(ext4_kset); |
4318 | out4: | 4822 | out4: |
4319 | exit_ext4_system_zone(); | 4823 | ext4_exit_system_zone(); |
4824 | out5: | ||
4825 | ext4_exit_pageio(); | ||
4320 | return err; | 4826 | return err; |
4321 | } | 4827 | } |
4322 | 4828 | ||
4323 | static void __exit exit_ext4_fs(void) | 4829 | static void __exit ext4_exit_fs(void) |
4324 | { | 4830 | { |
4831 | ext4_destroy_lazyinit_thread(); | ||
4325 | unregister_as_ext2(); | 4832 | unregister_as_ext2(); |
4326 | unregister_as_ext3(); | 4833 | unregister_as_ext3(); |
4327 | unregister_filesystem(&ext4_fs_type); | 4834 | unregister_filesystem(&ext4_fs_type); |
4328 | destroy_inodecache(); | 4835 | destroy_inodecache(); |
4329 | exit_ext4_xattr(); | 4836 | ext4_exit_xattr(); |
4330 | exit_ext4_mballoc(); | 4837 | ext4_exit_mballoc(); |
4331 | remove_proc_entry("fs/ext4", NULL); | 4838 | remove_proc_entry("fs/ext4", NULL); |
4332 | kset_unregister(ext4_kset); | 4839 | kset_unregister(ext4_kset); |
4333 | exit_ext4_system_zone(); | 4840 | ext4_exit_system_zone(); |
4841 | ext4_exit_pageio(); | ||
4334 | } | 4842 | } |
4335 | 4843 | ||
4336 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 4844 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
4337 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); | 4845 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); |
4338 | MODULE_LICENSE("GPL"); | 4846 | MODULE_LICENSE("GPL"); |
4339 | module_init(init_ext4_fs) | 4847 | module_init(ext4_init_fs) |
4340 | module_exit(exit_ext4_fs) | 4848 | module_exit(ext4_exit_fs) |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3a8cd8dff1ad..fa4b899da4b3 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, | |||
1588 | #undef BLOCK_HASH_SHIFT | 1588 | #undef BLOCK_HASH_SHIFT |
1589 | 1589 | ||
1590 | int __init | 1590 | int __init |
1591 | init_ext4_xattr(void) | 1591 | ext4_init_xattr(void) |
1592 | { | 1592 | { |
1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); | 1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); |
1594 | if (!ext4_xattr_cache) | 1594 | if (!ext4_xattr_cache) |
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void) | |||
1597 | } | 1597 | } |
1598 | 1598 | ||
1599 | void | 1599 | void |
1600 | exit_ext4_xattr(void) | 1600 | ext4_exit_xattr(void) |
1601 | { | 1601 | { |
1602 | if (ext4_xattr_cache) | 1602 | if (ext4_xattr_cache) |
1603 | mb_cache_destroy(ext4_xattr_cache); | 1603 | mb_cache_destroy(ext4_xattr_cache); |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 518e96e43905..1ef16520b950 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *); | |||
83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | 83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, |
84 | struct ext4_inode *raw_inode, handle_t *handle); | 84 | struct ext4_inode *raw_inode, handle_t *handle); |
85 | 85 | ||
86 | extern int init_ext4_xattr(void); | 86 | extern int __init ext4_init_xattr(void); |
87 | extern void exit_ext4_xattr(void); | 87 | extern void ext4_exit_xattr(void); |
88 | 88 | ||
89 | extern const struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb) | |||
121 | { | 121 | { |
122 | } | 122 | } |
123 | 123 | ||
124 | static inline int | 124 | static __init inline int |
125 | init_ext4_xattr(void) | 125 | ext4_init_xattr(void) |
126 | { | 126 | { |
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static inline void | 130 | static inline void |
131 | exit_ext4_xattr(void) | 131 | ext4_exit_xattr(void) |
132 | { | 132 | { |
133 | } | 133 | } |
134 | 134 | ||