diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-28 00:54:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-28 00:54:31 -0400 |
commit | 81280572ca6f54009edfa4deee563e8678784218 (patch) | |
tree | 4fac10bcb71261823d261e5f8551fdb16ab653ba /fs | |
parent | b83db1deb29eb4eea9bf5992431d26978e039ce6 (diff) | |
parent | a107e5a3a473a2ea62bd5af24e11b84adf1486ff (diff) |
Merge branch 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (50 commits)
ext4,jbd2: convert tracepoints to use major/minor numbers
ext4: optimize orphan_list handling for ext4_setattr
ext4: fix unbalanced mutex unlock in error path of ext4_li_request_new
ext4: fix compile error in ext4_fallocate()
ext4: move ext4_mb_{get,put}_buddy_cache_lock and make them static
ext4: rename mark_bitmap_end() to ext4_mark_bitmap_end()
ext4: move flush_completed_IO to fs/ext4/fsync.c and make it static
ext4: rename {ext,idx}_pblock and inline small extent functions
ext4: make various ext4 functions be static
ext4: rename {exit,init}_ext4_*() to ext4_{exit,init}_*()
ext4: fix kernel oops if the journal superblock has a non-zero j_errno
ext4: update writeback_index based on last page scanned
ext4: implement writeback livelock avoidance using page tagging
ext4: tidy up a void argument in inode.c
ext4: add batched_discard into ext4 feature list
ext4: Add batched discard support for ext4
fs: Add FITRIM ioctl
ext4: Use return value from sb_issue_discard()
ext4: Check return value of sb_getblk() and friends
ext4: use bio layer instead of buffer layer in mpage_da_submit_io
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/Makefile | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 5 | ||||
-rw-r--r-- | fs/ext4/block_validity.c | 7 | ||||
-rw-r--r-- | fs/ext4/dir.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 110 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 65 | ||||
-rw-r--r-- | fs/ext4/extents.c | 368 | ||||
-rw-r--r-- | fs/ext4/file.c | 44 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 83 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 135 | ||||
-rw-r--r-- | fs/ext4/inode.c | 587 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 554 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 22 | ||||
-rw-r--r-- | fs/ext4/namei.c | 63 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 430 | ||||
-rw-r--r-- | fs/ext4/resize.c | 52 | ||||
-rw-r--r-- | fs/ext4/super.c | 531 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 4 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 8 | ||||
-rw-r--r-- | fs/ioctl.c | 39 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 10 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 12 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 4 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 1 |
25 files changed, 2189 insertions, 951 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8867b2a1e5fe..c947e36eda6c 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_EXT4_FS) += ext4.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o |
10 | 10 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index bd30799a43ed..14c3af26c671 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
171 | * less than the blocksize * 8 ( which is the size | 171 | * less than the blocksize * 8 ( which is the size |
172 | * of bitmap ), set rest of the block bitmap to 1 | 172 | * of bitmap ), set rest of the block bitmap to 1 |
173 | */ | 173 | */ |
174 | mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); | 174 | ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8, |
175 | bh->b_data); | ||
175 | } | 176 | } |
176 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); | 177 | return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); |
177 | } | 178 | } |
@@ -489,7 +490,7 @@ error_return: | |||
489 | * Check if filesystem has nblocks free & available for allocation. | 490 | * Check if filesystem has nblocks free & available for allocation. |
490 | * On success return 1, return 0 on failure. | 491 | * On success return 1, return 0 on failure. |
491 | */ | 492 | */ |
492 | int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | 493 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) |
493 | { | 494 | { |
494 | s64 free_blocks, dirty_blocks, root_blocks; | 495 | s64 free_blocks, dirty_blocks, root_blocks; |
495 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | 496 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 3db5084db9bd..fac90f3fba80 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -29,16 +29,15 @@ struct ext4_system_zone { | |||
29 | 29 | ||
30 | static struct kmem_cache *ext4_system_zone_cachep; | 30 | static struct kmem_cache *ext4_system_zone_cachep; |
31 | 31 | ||
32 | int __init init_ext4_system_zone(void) | 32 | int __init ext4_init_system_zone(void) |
33 | { | 33 | { |
34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, | 34 | ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0); |
35 | SLAB_RECLAIM_ACCOUNT); | ||
36 | if (ext4_system_zone_cachep == NULL) | 35 | if (ext4_system_zone_cachep == NULL) |
37 | return -ENOMEM; | 36 | return -ENOMEM; |
38 | return 0; | 37 | return 0; |
39 | } | 38 | } |
40 | 39 | ||
41 | void exit_ext4_system_zone(void) | 40 | void ext4_exit_system_zone(void) |
42 | { | 41 | { |
43 | kmem_cache_destroy(ext4_system_zone_cachep); | 42 | kmem_cache_destroy(ext4_system_zone_cachep); |
44 | } | 43 | } |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 374510f72baa..ece76fb6a40c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode, | |||
39 | struct file *filp); | 39 | struct file *filp); |
40 | 40 | ||
41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
42 | .llseek = generic_file_llseek, | 42 | .llseek = ext4_llseek, |
43 | .read = generic_read_dir, | 43 | .read = generic_read_dir, |
44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ | 44 | .readdir = ext4_readdir, /* we take BKL. needed?*/ |
45 | .unlocked_ioctl = ext4_ioctl, | 45 | .unlocked_ioctl = ext4_ioctl, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 889ec9d5e6ad..8b5dd6369f82 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -168,7 +168,20 @@ struct mpage_da_data { | |||
168 | int pages_written; | 168 | int pages_written; |
169 | int retval; | 169 | int retval; |
170 | }; | 170 | }; |
171 | #define EXT4_IO_UNWRITTEN 0x1 | 171 | |
172 | /* | ||
173 | * Flags for ext4_io_end->flags | ||
174 | */ | ||
175 | #define EXT4_IO_END_UNWRITTEN 0x0001 | ||
176 | #define EXT4_IO_END_ERROR 0x0002 | ||
177 | |||
178 | struct ext4_io_page { | ||
179 | struct page *p_page; | ||
180 | int p_count; | ||
181 | }; | ||
182 | |||
183 | #define MAX_IO_PAGES 128 | ||
184 | |||
172 | typedef struct ext4_io_end { | 185 | typedef struct ext4_io_end { |
173 | struct list_head list; /* per-file finished IO list */ | 186 | struct list_head list; /* per-file finished IO list */ |
174 | struct inode *inode; /* file being written to */ | 187 | struct inode *inode; /* file being written to */ |
@@ -179,8 +192,18 @@ typedef struct ext4_io_end { | |||
179 | struct work_struct work; /* data work queue */ | 192 | struct work_struct work; /* data work queue */ |
180 | struct kiocb *iocb; /* iocb struct for AIO */ | 193 | struct kiocb *iocb; /* iocb struct for AIO */ |
181 | int result; /* error value for AIO */ | 194 | int result; /* error value for AIO */ |
195 | int num_io_pages; | ||
196 | struct ext4_io_page *pages[MAX_IO_PAGES]; | ||
182 | } ext4_io_end_t; | 197 | } ext4_io_end_t; |
183 | 198 | ||
199 | struct ext4_io_submit { | ||
200 | int io_op; | ||
201 | struct bio *io_bio; | ||
202 | ext4_io_end_t *io_end; | ||
203 | struct ext4_io_page *io_page; | ||
204 | sector_t io_next_block; | ||
205 | }; | ||
206 | |||
184 | /* | 207 | /* |
185 | * Special inodes numbers | 208 | * Special inodes numbers |
186 | */ | 209 | */ |
@@ -205,6 +228,7 @@ typedef struct ext4_io_end { | |||
205 | #define EXT4_MIN_BLOCK_SIZE 1024 | 228 | #define EXT4_MIN_BLOCK_SIZE 1024 |
206 | #define EXT4_MAX_BLOCK_SIZE 65536 | 229 | #define EXT4_MAX_BLOCK_SIZE 65536 |
207 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 | 230 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 |
231 | #define EXT4_MAX_BLOCK_LOG_SIZE 16 | ||
208 | #ifdef __KERNEL__ | 232 | #ifdef __KERNEL__ |
209 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) | 233 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) |
210 | #else | 234 | #else |
@@ -889,6 +913,7 @@ struct ext4_inode_info { | |||
889 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 913 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
890 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 914 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
891 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 915 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
916 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | ||
892 | 917 | ||
893 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 918 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
894 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt | 919 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt |
@@ -1087,7 +1112,6 @@ struct ext4_sb_info { | |||
1087 | struct completion s_kobj_unregister; | 1112 | struct completion s_kobj_unregister; |
1088 | 1113 | ||
1089 | /* Journaling */ | 1114 | /* Journaling */ |
1090 | struct inode *s_journal_inode; | ||
1091 | struct journal_s *s_journal; | 1115 | struct journal_s *s_journal; |
1092 | struct list_head s_orphan; | 1116 | struct list_head s_orphan; |
1093 | struct mutex s_orphan_lock; | 1117 | struct mutex s_orphan_lock; |
@@ -1120,10 +1144,7 @@ struct ext4_sb_info { | |||
1120 | /* for buddy allocator */ | 1144 | /* for buddy allocator */ |
1121 | struct ext4_group_info ***s_group_info; | 1145 | struct ext4_group_info ***s_group_info; |
1122 | struct inode *s_buddy_cache; | 1146 | struct inode *s_buddy_cache; |
1123 | long s_blocks_reserved; | ||
1124 | spinlock_t s_reserve_lock; | ||
1125 | spinlock_t s_md_lock; | 1147 | spinlock_t s_md_lock; |
1126 | tid_t s_last_transaction; | ||
1127 | unsigned short *s_mb_offsets; | 1148 | unsigned short *s_mb_offsets; |
1128 | unsigned int *s_mb_maxs; | 1149 | unsigned int *s_mb_maxs; |
1129 | 1150 | ||
@@ -1141,7 +1162,6 @@ struct ext4_sb_info { | |||
1141 | unsigned long s_mb_last_start; | 1162 | unsigned long s_mb_last_start; |
1142 | 1163 | ||
1143 | /* stats for buddy allocator */ | 1164 | /* stats for buddy allocator */ |
1144 | spinlock_t s_mb_pa_lock; | ||
1145 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | 1165 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ |
1146 | atomic_t s_bal_success; /* we found long enough chunks */ | 1166 | atomic_t s_bal_success; /* we found long enough chunks */ |
1147 | atomic_t s_bal_allocated; /* in blocks */ | 1167 | atomic_t s_bal_allocated; /* in blocks */ |
@@ -1172,6 +1192,11 @@ struct ext4_sb_info { | |||
1172 | 1192 | ||
1173 | /* timer for periodic error stats printing */ | 1193 | /* timer for periodic error stats printing */ |
1174 | struct timer_list s_err_report; | 1194 | struct timer_list s_err_report; |
1195 | |||
1196 | /* Lazy inode table initialization info */ | ||
1197 | struct ext4_li_request *s_li_request; | ||
1198 | /* Wait multiplier for lazy initialization thread */ | ||
1199 | unsigned int s_li_wait_mult; | ||
1175 | }; | 1200 | }; |
1176 | 1201 | ||
1177 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1202 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1533,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
1533 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 1558 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
1534 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); | 1559 | ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp); |
1535 | 1560 | ||
1536 | extern struct proc_dir_entry *ext4_proc_root; | 1561 | /* |
1562 | * Timeout and state flag for lazy initialization inode thread. | ||
1563 | */ | ||
1564 | #define EXT4_DEF_LI_WAIT_MULT 10 | ||
1565 | #define EXT4_DEF_LI_MAX_START_DELAY 5 | ||
1566 | #define EXT4_LAZYINIT_QUIT 0x0001 | ||
1567 | #define EXT4_LAZYINIT_RUNNING 0x0002 | ||
1568 | |||
1569 | /* | ||
1570 | * Lazy inode table initialization info | ||
1571 | */ | ||
1572 | struct ext4_lazy_init { | ||
1573 | unsigned long li_state; | ||
1574 | |||
1575 | wait_queue_head_t li_wait_daemon; | ||
1576 | wait_queue_head_t li_wait_task; | ||
1577 | struct timer_list li_timer; | ||
1578 | struct task_struct *li_task; | ||
1579 | |||
1580 | struct list_head li_request_list; | ||
1581 | struct mutex li_list_mtx; | ||
1582 | }; | ||
1583 | |||
1584 | struct ext4_li_request { | ||
1585 | struct super_block *lr_super; | ||
1586 | struct ext4_sb_info *lr_sbi; | ||
1587 | ext4_group_t lr_next_group; | ||
1588 | struct list_head lr_request; | ||
1589 | unsigned long lr_next_sched; | ||
1590 | unsigned long lr_timeout; | ||
1591 | }; | ||
1592 | |||
1593 | struct ext4_features { | ||
1594 | struct kobject f_kobj; | ||
1595 | struct completion f_kobj_unregister; | ||
1596 | }; | ||
1537 | 1597 | ||
1538 | /* | 1598 | /* |
1539 | * Function prototypes | 1599 | * Function prototypes |
@@ -1561,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | |||
1561 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 1621 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
1562 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 1622 | ext4_fsblk_t goal, unsigned long *count, int *errp); |
1563 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1623 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
1564 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | ||
1565 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 1624 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
1566 | ext4_fsblk_t block, unsigned long count); | 1625 | ext4_fsblk_t block, unsigned long count); |
1567 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1626 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
@@ -1605,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | |||
1605 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 1664 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
1606 | extern unsigned long ext4_count_dirs(struct super_block *); | 1665 | extern unsigned long ext4_count_dirs(struct super_block *); |
1607 | extern void ext4_check_inodes_bitmap(struct super_block *); | 1666 | extern void ext4_check_inodes_bitmap(struct super_block *); |
1608 | extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | 1667 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
1609 | struct buffer_head *bh, | 1668 | extern int ext4_init_inode_table(struct super_block *sb, |
1610 | ext4_group_t group, | 1669 | ext4_group_t group, int barrier); |
1611 | struct ext4_group_desc *desc); | ||
1612 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | ||
1613 | 1670 | ||
1614 | /* mballoc.c */ | 1671 | /* mballoc.c */ |
1615 | extern long ext4_mb_stats; | 1672 | extern long ext4_mb_stats; |
@@ -1620,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | |||
1620 | struct ext4_allocation_request *, int *); | 1677 | struct ext4_allocation_request *, int *); |
1621 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1678 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
1622 | extern void ext4_discard_preallocations(struct inode *); | 1679 | extern void ext4_discard_preallocations(struct inode *); |
1623 | extern int __init init_ext4_mballoc(void); | 1680 | extern int __init ext4_init_mballoc(void); |
1624 | extern void exit_ext4_mballoc(void); | 1681 | extern void ext4_exit_mballoc(void); |
1625 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 1682 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1626 | struct buffer_head *bh, ext4_fsblk_t block, | 1683 | struct buffer_head *bh, ext4_fsblk_t block, |
1627 | unsigned long count, int flags); | 1684 | unsigned long count, int flags); |
1628 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1685 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1629 | ext4_group_t i, struct ext4_group_desc *desc); | 1686 | ext4_group_t i, struct ext4_group_desc *desc); |
1630 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1687 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
1631 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1688 | |
1632 | ext4_group_t, int); | ||
1633 | /* inode.c */ | 1689 | /* inode.c */ |
1634 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, | 1690 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, |
1635 | ext4_lblk_t, int, int *); | 1691 | ext4_lblk_t, int, int *); |
@@ -1657,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *); | |||
1657 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1713 | extern int ext4_alloc_da_blocks(struct inode *inode); |
1658 | extern void ext4_set_aops(struct inode *inode); | 1714 | extern void ext4_set_aops(struct inode *inode); |
1659 | extern int ext4_writepage_trans_blocks(struct inode *); | 1715 | extern int ext4_writepage_trans_blocks(struct inode *); |
1660 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1661 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 1716 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
1662 | extern int ext4_block_truncate_page(handle_t *handle, | 1717 | extern int ext4_block_truncate_page(handle_t *handle, |
1663 | struct address_space *mapping, loff_t from); | 1718 | struct address_space *mapping, loff_t from); |
1664 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1719 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1665 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1720 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1666 | extern int flush_completed_IO(struct inode *inode); | ||
1667 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1721 | extern void ext4_da_update_reserve_space(struct inode *inode, |
1668 | int used, int quota_claim); | 1722 | int used, int quota_claim); |
1669 | /* ioctl.c */ | 1723 | /* ioctl.c */ |
@@ -1960,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations; | |||
1960 | /* file.c */ | 2014 | /* file.c */ |
1961 | extern const struct inode_operations ext4_file_inode_operations; | 2015 | extern const struct inode_operations ext4_file_inode_operations; |
1962 | extern const struct file_operations ext4_file_operations; | 2016 | extern const struct file_operations ext4_file_operations; |
2017 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | ||
1963 | 2018 | ||
1964 | /* namei.c */ | 2019 | /* namei.c */ |
1965 | extern const struct inode_operations ext4_dir_inode_operations; | 2020 | extern const struct inode_operations ext4_dir_inode_operations; |
@@ -1973,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1973 | /* block_validity */ | 2028 | /* block_validity */ |
1974 | extern void ext4_release_system_zone(struct super_block *sb); | 2029 | extern void ext4_release_system_zone(struct super_block *sb); |
1975 | extern int ext4_setup_system_zone(struct super_block *sb); | 2030 | extern int ext4_setup_system_zone(struct super_block *sb); |
1976 | extern int __init init_ext4_system_zone(void); | 2031 | extern int __init ext4_init_system_zone(void); |
1977 | extern void exit_ext4_system_zone(void); | 2032 | extern void ext4_exit_system_zone(void); |
1978 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | 2033 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, |
1979 | ext4_fsblk_t start_blk, | 2034 | ext4_fsblk_t start_blk, |
1980 | unsigned int count); | 2035 | unsigned int count); |
@@ -2002,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2002 | __u64 start_orig, __u64 start_donor, | 2057 | __u64 start_orig, __u64 start_donor, |
2003 | __u64 len, __u64 *moved_len); | 2058 | __u64 len, __u64 *moved_len); |
2004 | 2059 | ||
2060 | /* page-io.c */ | ||
2061 | extern int __init ext4_init_pageio(void); | ||
2062 | extern void ext4_exit_pageio(void); | ||
2063 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2064 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | ||
2065 | extern int ext4_end_io_nolock(ext4_io_end_t *io); | ||
2066 | extern void ext4_io_submit(struct ext4_io_submit *io); | ||
2067 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | ||
2068 | struct page *page, | ||
2069 | int len, | ||
2070 | struct writeback_control *wbc); | ||
2005 | 2071 | ||
2006 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2072 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ |
2007 | enum ext4_state_bits { | 2073 | enum ext4_state_bits { |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index bdb6ce7e2eb4..28ce70fd9cd0 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) | |||
225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); | 225 | ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); |
226 | } | 226 | } |
227 | 227 | ||
228 | /* | ||
229 | * ext4_ext_pblock: | ||
230 | * combine low and high parts of physical block number into ext4_fsblk_t | ||
231 | */ | ||
232 | static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex) | ||
233 | { | ||
234 | ext4_fsblk_t block; | ||
235 | |||
236 | block = le32_to_cpu(ex->ee_start_lo); | ||
237 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
238 | return block; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * ext4_idx_pblock: | ||
243 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
244 | */ | ||
245 | static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix) | ||
246 | { | ||
247 | ext4_fsblk_t block; | ||
248 | |||
249 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
250 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
251 | return block; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * ext4_ext_store_pblock: | ||
256 | * stores a large physical block number into an extent struct, | ||
257 | * breaking it into parts | ||
258 | */ | ||
259 | static inline void ext4_ext_store_pblock(struct ext4_extent *ex, | ||
260 | ext4_fsblk_t pb) | ||
261 | { | ||
262 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
263 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
264 | 0xffff); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * ext4_idx_store_pblock: | ||
269 | * stores a large physical block number into an index struct, | ||
270 | * breaking it into parts | ||
271 | */ | ||
272 | static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, | ||
273 | ext4_fsblk_t pb) | ||
274 | { | ||
275 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
276 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & | ||
277 | 0xffff); | ||
278 | } | ||
279 | |||
228 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 280 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
229 | sector_t lblocks); | 281 | sector_t lblocks); |
230 | extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); | ||
231 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | ||
232 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | ||
233 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 282 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
234 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | 283 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
235 | int num, | 284 | int num, |
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | |||
237 | extern int ext4_can_extents_be_merged(struct inode *inode, | 286 | extern int ext4_can_extents_be_merged(struct inode *inode, |
238 | struct ext4_extent *ex1, | 287 | struct ext4_extent *ex1, |
239 | struct ext4_extent *ex2); | 288 | struct ext4_extent *ex2); |
240 | extern int ext4_ext_try_to_merge(struct inode *inode, | ||
241 | struct ext4_ext_path *path, | ||
242 | struct ext4_extent *); | ||
243 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | ||
244 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); | 289 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int); |
245 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
246 | ext_prepare_callback, void *); | ||
247 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 290 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
248 | struct ext4_ext_path *); | 291 | struct ext4_ext_path *); |
249 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | ||
250 | ext4_lblk_t *, ext4_fsblk_t *); | ||
251 | extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, | ||
252 | ext4_lblk_t *, ext4_fsblk_t *); | ||
253 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 292 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
254 | extern int ext4_ext_check_inode(struct inode *inode); | 293 | extern int ext4_ext_check_inode(struct inode *inode); |
255 | #endif /* _EXT4_EXTENTS */ | 294 | #endif /* _EXT4_EXTENTS */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 06328d3e5717..0554c48cb1fd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,55 +44,6 @@ | |||
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | 46 | ||
47 | |||
48 | /* | ||
49 | * ext_pblock: | ||
50 | * combine low and high parts of physical block number into ext4_fsblk_t | ||
51 | */ | ||
52 | ext4_fsblk_t ext_pblock(struct ext4_extent *ex) | ||
53 | { | ||
54 | ext4_fsblk_t block; | ||
55 | |||
56 | block = le32_to_cpu(ex->ee_start_lo); | ||
57 | block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; | ||
58 | return block; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * idx_pblock: | ||
63 | * combine low and high parts of a leaf physical block number into ext4_fsblk_t | ||
64 | */ | ||
65 | ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) | ||
66 | { | ||
67 | ext4_fsblk_t block; | ||
68 | |||
69 | block = le32_to_cpu(ix->ei_leaf_lo); | ||
70 | block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; | ||
71 | return block; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * ext4_ext_store_pblock: | ||
76 | * stores a large physical block number into an extent struct, | ||
77 | * breaking it into parts | ||
78 | */ | ||
79 | void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) | ||
80 | { | ||
81 | ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
82 | ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * ext4_idx_store_pblock: | ||
87 | * stores a large physical block number into an index struct, | ||
88 | * breaking it into parts | ||
89 | */ | ||
90 | static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | ||
91 | { | ||
92 | ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); | ||
93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | ||
94 | } | ||
95 | |||
96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 47 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
97 | struct inode *inode, | 48 | struct inode *inode, |
98 | int needed) | 49 | int needed) |
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
169 | /* try to predict block placement */ | 120 | /* try to predict block placement */ |
170 | ex = path[depth].p_ext; | 121 | ex = path[depth].p_ext; |
171 | if (ex) | 122 | if (ex) |
172 | return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); | 123 | return (ext4_ext_pblock(ex) + |
124 | (block - le32_to_cpu(ex->ee_block))); | ||
173 | 125 | ||
174 | /* it looks like index is empty; | 126 | /* it looks like index is empty; |
175 | * try to find starting block from index itself */ | 127 | * try to find starting block from index itself */ |
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
354 | 306 | ||
355 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 307 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
356 | { | 308 | { |
357 | ext4_fsblk_t block = ext_pblock(ext); | 309 | ext4_fsblk_t block = ext4_ext_pblock(ext); |
358 | int len = ext4_ext_get_actual_len(ext); | 310 | int len = ext4_ext_get_actual_len(ext); |
359 | 311 | ||
360 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); | 312 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); |
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
363 | static int ext4_valid_extent_idx(struct inode *inode, | 315 | static int ext4_valid_extent_idx(struct inode *inode, |
364 | struct ext4_extent_idx *ext_idx) | 316 | struct ext4_extent_idx *ext_idx) |
365 | { | 317 | { |
366 | ext4_fsblk_t block = idx_pblock(ext_idx); | 318 | ext4_fsblk_t block = ext4_idx_pblock(ext_idx); |
367 | 319 | ||
368 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); | 320 | return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); |
369 | } | 321 | } |
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
463 | for (k = 0; k <= l; k++, path++) { | 415 | for (k = 0; k <= l; k++, path++) { |
464 | if (path->p_idx) { | 416 | if (path->p_idx) { |
465 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 417 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
466 | idx_pblock(path->p_idx)); | 418 | ext4_idx_pblock(path->p_idx)); |
467 | } else if (path->p_ext) { | 419 | } else if (path->p_ext) { |
468 | ext_debug(" %d:[%d]%d:%llu ", | 420 | ext_debug(" %d:[%d]%d:%llu ", |
469 | le32_to_cpu(path->p_ext->ee_block), | 421 | le32_to_cpu(path->p_ext->ee_block), |
470 | ext4_ext_is_uninitialized(path->p_ext), | 422 | ext4_ext_is_uninitialized(path->p_ext), |
471 | ext4_ext_get_actual_len(path->p_ext), | 423 | ext4_ext_get_actual_len(path->p_ext), |
472 | ext_pblock(path->p_ext)); | 424 | ext4_ext_pblock(path->p_ext)); |
473 | } else | 425 | } else |
474 | ext_debug(" []"); | 426 | ext_debug(" []"); |
475 | } | 427 | } |
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
494 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 446 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
495 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), | 447 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
496 | ext4_ext_is_uninitialized(ex), | 448 | ext4_ext_is_uninitialized(ex), |
497 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 449 | ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); |
498 | } | 450 | } |
499 | ext_debug("\n"); | 451 | ext_debug("\n"); |
500 | } | 452 | } |
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
545 | 497 | ||
546 | path->p_idx = l - 1; | 498 | path->p_idx = l - 1; |
547 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), | 499 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), |
548 | idx_pblock(path->p_idx)); | 500 | ext4_idx_pblock(path->p_idx)); |
549 | 501 | ||
550 | #ifdef CHECK_BINSEARCH | 502 | #ifdef CHECK_BINSEARCH |
551 | { | 503 | { |
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode, | |||
614 | path->p_ext = l - 1; | 566 | path->p_ext = l - 1; |
615 | ext_debug(" -> %d:%llu:[%d]%d ", | 567 | ext_debug(" -> %d:%llu:[%d]%d ", |
616 | le32_to_cpu(path->p_ext->ee_block), | 568 | le32_to_cpu(path->p_ext->ee_block), |
617 | ext_pblock(path->p_ext), | 569 | ext4_ext_pblock(path->p_ext), |
618 | ext4_ext_is_uninitialized(path->p_ext), | 570 | ext4_ext_is_uninitialized(path->p_ext), |
619 | ext4_ext_get_actual_len(path->p_ext)); | 571 | ext4_ext_get_actual_len(path->p_ext)); |
620 | 572 | ||
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
682 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); | 634 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); |
683 | 635 | ||
684 | ext4_ext_binsearch_idx(inode, path + ppos, block); | 636 | ext4_ext_binsearch_idx(inode, path + ppos, block); |
685 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 637 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
686 | path[ppos].p_depth = i; | 638 | path[ppos].p_depth = i; |
687 | path[ppos].p_ext = NULL; | 639 | path[ppos].p_ext = NULL; |
688 | 640 | ||
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
721 | ext4_ext_binsearch(inode, path + ppos, block); | 673 | ext4_ext_binsearch(inode, path + ppos, block); |
722 | /* if not an empty leaf */ | 674 | /* if not an empty leaf */ |
723 | if (path[ppos].p_ext) | 675 | if (path[ppos].p_ext) |
724 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 676 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
725 | 677 | ||
726 | ext4_ext_show_path(inode, path); | 678 | ext4_ext_show_path(inode, path); |
727 | 679 | ||
@@ -739,9 +691,9 @@ err: | |||
739 | * insert new index [@logical;@ptr] into the block at @curp; | 691 | * insert new index [@logical;@ptr] into the block at @curp; |
740 | * check where to insert: before @curp or after @curp | 692 | * check where to insert: before @curp or after @curp |
741 | */ | 693 | */ |
742 | int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | 694 | static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, |
743 | struct ext4_ext_path *curp, | 695 | struct ext4_ext_path *curp, |
744 | int logical, ext4_fsblk_t ptr) | 696 | int logical, ext4_fsblk_t ptr) |
745 | { | 697 | { |
746 | struct ext4_extent_idx *ix; | 698 | struct ext4_extent_idx *ix; |
747 | int len, err; | 699 | int len, err; |
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
917 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | 869 | EXT_MAX_EXTENT(path[depth].p_hdr)) { |
918 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", | 870 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", |
919 | le32_to_cpu(path[depth].p_ext->ee_block), | 871 | le32_to_cpu(path[depth].p_ext->ee_block), |
920 | ext_pblock(path[depth].p_ext), | 872 | ext4_ext_pblock(path[depth].p_ext), |
921 | ext4_ext_is_uninitialized(path[depth].p_ext), | 873 | ext4_ext_is_uninitialized(path[depth].p_ext), |
922 | ext4_ext_get_actual_len(path[depth].p_ext), | 874 | ext4_ext_get_actual_len(path[depth].p_ext), |
923 | newblock); | 875 | newblock); |
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
1007 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { | 959 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { |
1008 | ext_debug("%d: move %d:%llu in new index %llu\n", i, | 960 | ext_debug("%d: move %d:%llu in new index %llu\n", i, |
1009 | le32_to_cpu(path[i].p_idx->ei_block), | 961 | le32_to_cpu(path[i].p_idx->ei_block), |
1010 | idx_pblock(path[i].p_idx), | 962 | ext4_idx_pblock(path[i].p_idx), |
1011 | newblock); | 963 | newblock); |
1012 | /*memmove(++fidx, path[i].p_idx++, | 964 | /*memmove(++fidx, path[i].p_idx++, |
1013 | sizeof(struct ext4_extent_idx)); | 965 | sizeof(struct ext4_extent_idx)); |
@@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1146 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", | 1098 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", |
1147 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), | 1099 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), |
1148 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), | 1100 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1149 | idx_pblock(EXT_FIRST_INDEX(neh))); | 1101 | ext4_idx_pblock(EXT_FIRST_INDEX(neh))); |
1150 | 1102 | ||
1151 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); | 1103 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); |
1152 | err = ext4_ext_dirty(handle, inode, curp); | 1104 | err = ext4_ext_dirty(handle, inode, curp); |
@@ -1232,9 +1184,9 @@ out: | |||
1232 | * returns 0 at @phys | 1184 | * returns 0 at @phys |
1233 | * return value contains 0 (success) or error code | 1185 | * return value contains 0 (success) or error code |
1234 | */ | 1186 | */ |
1235 | int | 1187 | static int ext4_ext_search_left(struct inode *inode, |
1236 | ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | 1188 | struct ext4_ext_path *path, |
1237 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1189 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1238 | { | 1190 | { |
1239 | struct ext4_extent_idx *ix; | 1191 | struct ext4_extent_idx *ix; |
1240 | struct ext4_extent *ex; | 1192 | struct ext4_extent *ex; |
@@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1286 | } | 1238 | } |
1287 | 1239 | ||
1288 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; | 1240 | *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; |
1289 | *phys = ext_pblock(ex) + ee_len - 1; | 1241 | *phys = ext4_ext_pblock(ex) + ee_len - 1; |
1290 | return 0; | 1242 | return 0; |
1291 | } | 1243 | } |
1292 | 1244 | ||
@@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, | |||
1297 | * returns 0 at @phys | 1249 | * returns 0 at @phys |
1298 | * return value contains 0 (success) or error code | 1250 | * return value contains 0 (success) or error code |
1299 | */ | 1251 | */ |
1300 | int | 1252 | static int ext4_ext_search_right(struct inode *inode, |
1301 | ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | 1253 | struct ext4_ext_path *path, |
1302 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1254 | ext4_lblk_t *logical, ext4_fsblk_t *phys) |
1303 | { | 1255 | { |
1304 | struct buffer_head *bh = NULL; | 1256 | struct buffer_head *bh = NULL; |
1305 | struct ext4_extent_header *eh; | 1257 | struct ext4_extent_header *eh; |
@@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1342 | } | 1294 | } |
1343 | } | 1295 | } |
1344 | *logical = le32_to_cpu(ex->ee_block); | 1296 | *logical = le32_to_cpu(ex->ee_block); |
1345 | *phys = ext_pblock(ex); | 1297 | *phys = ext4_ext_pblock(ex); |
1346 | return 0; | 1298 | return 0; |
1347 | } | 1299 | } |
1348 | 1300 | ||
@@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, | |||
1357 | /* next allocated block in this leaf */ | 1309 | /* next allocated block in this leaf */ |
1358 | ex++; | 1310 | ex++; |
1359 | *logical = le32_to_cpu(ex->ee_block); | 1311 | *logical = le32_to_cpu(ex->ee_block); |
1360 | *phys = ext_pblock(ex); | 1312 | *phys = ext4_ext_pblock(ex); |
1361 | return 0; | 1313 | return 0; |
1362 | } | 1314 | } |
1363 | 1315 | ||
@@ -1376,7 +1328,7 @@ got_index: | |||
1376 | * follow it and find the closest allocated | 1328 | * follow it and find the closest allocated |
1377 | * block to the right */ | 1329 | * block to the right */ |
1378 | ix++; | 1330 | ix++; |
1379 | block = idx_pblock(ix); | 1331 | block = ext4_idx_pblock(ix); |
1380 | while (++depth < path->p_depth) { | 1332 | while (++depth < path->p_depth) { |
1381 | bh = sb_bread(inode->i_sb, block); | 1333 | bh = sb_bread(inode->i_sb, block); |
1382 | if (bh == NULL) | 1334 | if (bh == NULL) |
@@ -1388,7 +1340,7 @@ got_index: | |||
1388 | return -EIO; | 1340 | return -EIO; |
1389 | } | 1341 | } |
1390 | ix = EXT_FIRST_INDEX(eh); | 1342 | ix = EXT_FIRST_INDEX(eh); |
1391 | block = idx_pblock(ix); | 1343 | block = ext4_idx_pblock(ix); |
1392 | put_bh(bh); | 1344 | put_bh(bh); |
1393 | } | 1345 | } |
1394 | 1346 | ||
@@ -1402,7 +1354,7 @@ got_index: | |||
1402 | } | 1354 | } |
1403 | ex = EXT_FIRST_EXTENT(eh); | 1355 | ex = EXT_FIRST_EXTENT(eh); |
1404 | *logical = le32_to_cpu(ex->ee_block); | 1356 | *logical = le32_to_cpu(ex->ee_block); |
1405 | *phys = ext_pblock(ex); | 1357 | *phys = ext4_ext_pblock(ex); |
1406 | put_bh(bh); | 1358 | put_bh(bh); |
1407 | return 0; | 1359 | return 0; |
1408 | } | 1360 | } |
@@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1573 | return 0; | 1525 | return 0; |
1574 | #endif | 1526 | #endif |
1575 | 1527 | ||
1576 | if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) | 1528 | if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) |
1577 | return 1; | 1529 | return 1; |
1578 | return 0; | 1530 | return 0; |
1579 | } | 1531 | } |
@@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1585 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns | 1537 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns |
1586 | * 1 if they got merged. | 1538 | * 1 if they got merged. |
1587 | */ | 1539 | */ |
1588 | int ext4_ext_try_to_merge(struct inode *inode, | 1540 | static int ext4_ext_try_to_merge(struct inode *inode, |
1589 | struct ext4_ext_path *path, | 1541 | struct ext4_ext_path *path, |
1590 | struct ext4_extent *ex) | 1542 | struct ext4_extent *ex) |
1591 | { | 1543 | { |
1592 | struct ext4_extent_header *eh; | 1544 | struct ext4_extent_header *eh; |
1593 | unsigned int depth, len; | 1545 | unsigned int depth, len; |
@@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1632 | * such that there will be no overlap, and then returns 1. | 1584 | * such that there will be no overlap, and then returns 1. |
1633 | * If there is no overlap found, it returns 0. | 1585 | * If there is no overlap found, it returns 0. |
1634 | */ | 1586 | */ |
1635 | unsigned int ext4_ext_check_overlap(struct inode *inode, | 1587 | static unsigned int ext4_ext_check_overlap(struct inode *inode, |
1636 | struct ext4_extent *newext, | 1588 | struct ext4_extent *newext, |
1637 | struct ext4_ext_path *path) | 1589 | struct ext4_ext_path *path) |
1638 | { | 1590 | { |
1639 | ext4_lblk_t b1, b2; | 1591 | ext4_lblk_t b1, b2; |
1640 | unsigned int depth, len1; | 1592 | unsigned int depth, len1; |
@@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1706 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) | 1658 | if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) |
1707 | && ext4_can_extents_be_merged(inode, ex, newext)) { | 1659 | && ext4_can_extents_be_merged(inode, ex, newext)) { |
1708 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", | 1660 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1709 | ext4_ext_is_uninitialized(newext), | 1661 | ext4_ext_is_uninitialized(newext), |
1710 | ext4_ext_get_actual_len(newext), | 1662 | ext4_ext_get_actual_len(newext), |
1711 | le32_to_cpu(ex->ee_block), | 1663 | le32_to_cpu(ex->ee_block), |
1712 | ext4_ext_is_uninitialized(ex), | 1664 | ext4_ext_is_uninitialized(ex), |
1713 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1665 | ext4_ext_get_actual_len(ex), |
1666 | ext4_ext_pblock(ex)); | ||
1714 | err = ext4_ext_get_access(handle, inode, path + depth); | 1667 | err = ext4_ext_get_access(handle, inode, path + depth); |
1715 | if (err) | 1668 | if (err) |
1716 | return err; | 1669 | return err; |
@@ -1780,7 +1733,7 @@ has_space: | |||
1780 | /* there is no extent in this leaf, create first one */ | 1733 | /* there is no extent in this leaf, create first one */ |
1781 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", | 1734 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
1782 | le32_to_cpu(newext->ee_block), | 1735 | le32_to_cpu(newext->ee_block), |
1783 | ext_pblock(newext), | 1736 | ext4_ext_pblock(newext), |
1784 | ext4_ext_is_uninitialized(newext), | 1737 | ext4_ext_is_uninitialized(newext), |
1785 | ext4_ext_get_actual_len(newext)); | 1738 | ext4_ext_get_actual_len(newext)); |
1786 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1739 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
@@ -1794,7 +1747,7 @@ has_space: | |||
1794 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " | 1747 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
1795 | "move %d from 0x%p to 0x%p\n", | 1748 | "move %d from 0x%p to 0x%p\n", |
1796 | le32_to_cpu(newext->ee_block), | 1749 | le32_to_cpu(newext->ee_block), |
1797 | ext_pblock(newext), | 1750 | ext4_ext_pblock(newext), |
1798 | ext4_ext_is_uninitialized(newext), | 1751 | ext4_ext_is_uninitialized(newext), |
1799 | ext4_ext_get_actual_len(newext), | 1752 | ext4_ext_get_actual_len(newext), |
1800 | nearex, len, nearex + 1, nearex + 2); | 1753 | nearex, len, nearex + 1, nearex + 2); |
@@ -1808,7 +1761,7 @@ has_space: | |||
1808 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " | 1761 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
1809 | "move %d from 0x%p to 0x%p\n", | 1762 | "move %d from 0x%p to 0x%p\n", |
1810 | le32_to_cpu(newext->ee_block), | 1763 | le32_to_cpu(newext->ee_block), |
1811 | ext_pblock(newext), | 1764 | ext4_ext_pblock(newext), |
1812 | ext4_ext_is_uninitialized(newext), | 1765 | ext4_ext_is_uninitialized(newext), |
1813 | ext4_ext_get_actual_len(newext), | 1766 | ext4_ext_get_actual_len(newext), |
1814 | nearex, len, nearex + 1, nearex + 2); | 1767 | nearex, len, nearex + 1, nearex + 2); |
@@ -1819,7 +1772,7 @@ has_space: | |||
1819 | le16_add_cpu(&eh->eh_entries, 1); | 1772 | le16_add_cpu(&eh->eh_entries, 1); |
1820 | nearex = path[depth].p_ext; | 1773 | nearex = path[depth].p_ext; |
1821 | nearex->ee_block = newext->ee_block; | 1774 | nearex->ee_block = newext->ee_block; |
1822 | ext4_ext_store_pblock(nearex, ext_pblock(newext)); | 1775 | ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); |
1823 | nearex->ee_len = newext->ee_len; | 1776 | nearex->ee_len = newext->ee_len; |
1824 | 1777 | ||
1825 | merge: | 1778 | merge: |
@@ -1845,9 +1798,9 @@ cleanup: | |||
1845 | return err; | 1798 | return err; |
1846 | } | 1799 | } |
1847 | 1800 | ||
1848 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | 1801 | static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, |
1849 | ext4_lblk_t num, ext_prepare_callback func, | 1802 | ext4_lblk_t num, ext_prepare_callback func, |
1850 | void *cbdata) | 1803 | void *cbdata) |
1851 | { | 1804 | { |
1852 | struct ext4_ext_path *path = NULL; | 1805 | struct ext4_ext_path *path = NULL; |
1853 | struct ext4_ext_cache cbex; | 1806 | struct ext4_ext_cache cbex; |
@@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1923 | } else { | 1876 | } else { |
1924 | cbex.ec_block = le32_to_cpu(ex->ee_block); | 1877 | cbex.ec_block = le32_to_cpu(ex->ee_block); |
1925 | cbex.ec_len = ext4_ext_get_actual_len(ex); | 1878 | cbex.ec_len = ext4_ext_get_actual_len(ex); |
1926 | cbex.ec_start = ext_pblock(ex); | 1879 | cbex.ec_start = ext4_ext_pblock(ex); |
1927 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | 1880 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; |
1928 | } | 1881 | } |
1929 | 1882 | ||
@@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2073 | 2026 | ||
2074 | /* free index block */ | 2027 | /* free index block */ |
2075 | path--; | 2028 | path--; |
2076 | leaf = idx_pblock(path->p_idx); | 2029 | leaf = ext4_idx_pblock(path->p_idx); |
2077 | if (unlikely(path->p_hdr->eh_entries == 0)) { | 2030 | if (unlikely(path->p_hdr->eh_entries == 0)) { |
2078 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); | 2031 | EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); |
2079 | return -EIO; | 2032 | return -EIO; |
@@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2181 | ext4_fsblk_t start; | 2134 | ext4_fsblk_t start; |
2182 | 2135 | ||
2183 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2136 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2184 | start = ext_pblock(ex) + ee_len - num; | 2137 | start = ext4_ext_pblock(ex) + ee_len - num; |
2185 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2138 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2186 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2139 | ext4_free_blocks(handle, inode, 0, start, num, flags); |
2187 | } else if (from == le32_to_cpu(ex->ee_block) | 2140 | } else if (from == le32_to_cpu(ex->ee_block) |
@@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2310 | goto out; | 2263 | goto out; |
2311 | 2264 | ||
2312 | ext_debug("new extent: %u:%u:%llu\n", block, num, | 2265 | ext_debug("new extent: %u:%u:%llu\n", block, num, |
2313 | ext_pblock(ex)); | 2266 | ext4_ext_pblock(ex)); |
2314 | ex--; | 2267 | ex--; |
2315 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2268 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2316 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2269 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2421,9 +2374,9 @@ again: | |||
2421 | struct buffer_head *bh; | 2374 | struct buffer_head *bh; |
2422 | /* go to the next level */ | 2375 | /* go to the next level */ |
2423 | ext_debug("move to level %d (block %llu)\n", | 2376 | ext_debug("move to level %d (block %llu)\n", |
2424 | i + 1, idx_pblock(path[i].p_idx)); | 2377 | i + 1, ext4_idx_pblock(path[i].p_idx)); |
2425 | memset(path + i + 1, 0, sizeof(*path)); | 2378 | memset(path + i + 1, 0, sizeof(*path)); |
2426 | bh = sb_bread(sb, idx_pblock(path[i].p_idx)); | 2379 | bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); |
2427 | if (!bh) { | 2380 | if (!bh) { |
2428 | /* should we reset i_size? */ | 2381 | /* should we reset i_size? */ |
2429 | err = -EIO; | 2382 | err = -EIO; |
@@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb) | |||
2535 | #endif | 2488 | #endif |
2536 | } | 2489 | } |
2537 | 2490 | ||
2538 | static void bi_complete(struct bio *bio, int error) | ||
2539 | { | ||
2540 | complete((struct completion *)bio->bi_private); | ||
2541 | } | ||
2542 | |||
2543 | /* FIXME!! we need to try to merge to left or right after zero-out */ | 2491 | /* FIXME!! we need to try to merge to left or right after zero-out */ |
2544 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | 2492 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) |
2545 | { | 2493 | { |
2494 | ext4_fsblk_t ee_pblock; | ||
2495 | unsigned int ee_len; | ||
2546 | int ret; | 2496 | int ret; |
2547 | struct bio *bio; | ||
2548 | int blkbits, blocksize; | ||
2549 | sector_t ee_pblock; | ||
2550 | struct completion event; | ||
2551 | unsigned int ee_len, len, done, offset; | ||
2552 | 2497 | ||
2553 | |||
2554 | blkbits = inode->i_blkbits; | ||
2555 | blocksize = inode->i_sb->s_blocksize; | ||
2556 | ee_len = ext4_ext_get_actual_len(ex); | 2498 | ee_len = ext4_ext_get_actual_len(ex); |
2557 | ee_pblock = ext_pblock(ex); | 2499 | ee_pblock = ext4_ext_pblock(ex); |
2558 | |||
2559 | /* convert ee_pblock to 512 byte sectors */ | ||
2560 | ee_pblock = ee_pblock << (blkbits - 9); | ||
2561 | |||
2562 | while (ee_len > 0) { | ||
2563 | |||
2564 | if (ee_len > BIO_MAX_PAGES) | ||
2565 | len = BIO_MAX_PAGES; | ||
2566 | else | ||
2567 | len = ee_len; | ||
2568 | |||
2569 | bio = bio_alloc(GFP_NOIO, len); | ||
2570 | if (!bio) | ||
2571 | return -ENOMEM; | ||
2572 | |||
2573 | bio->bi_sector = ee_pblock; | ||
2574 | bio->bi_bdev = inode->i_sb->s_bdev; | ||
2575 | |||
2576 | done = 0; | ||
2577 | offset = 0; | ||
2578 | while (done < len) { | ||
2579 | ret = bio_add_page(bio, ZERO_PAGE(0), | ||
2580 | blocksize, offset); | ||
2581 | if (ret != blocksize) { | ||
2582 | /* | ||
2583 | * We can't add any more pages because of | ||
2584 | * hardware limitations. Start a new bio. | ||
2585 | */ | ||
2586 | break; | ||
2587 | } | ||
2588 | done++; | ||
2589 | offset += blocksize; | ||
2590 | if (offset >= PAGE_CACHE_SIZE) | ||
2591 | offset = 0; | ||
2592 | } | ||
2593 | 2500 | ||
2594 | init_completion(&event); | 2501 | ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); |
2595 | bio->bi_private = &event; | 2502 | if (ret > 0) |
2596 | bio->bi_end_io = bi_complete; | 2503 | ret = 0; |
2597 | submit_bio(WRITE, bio); | ||
2598 | wait_for_completion(&event); | ||
2599 | 2504 | ||
2600 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 2505 | return ret; |
2601 | bio_put(bio); | ||
2602 | return -EIO; | ||
2603 | } | ||
2604 | bio_put(bio); | ||
2605 | ee_len -= done; | ||
2606 | ee_pblock += done << (blkbits - 9); | ||
2607 | } | ||
2608 | return 0; | ||
2609 | } | 2506 | } |
2610 | 2507 | ||
2611 | #define EXT4_EXT_ZERO_LEN 7 | 2508 | #define EXT4_EXT_ZERO_LEN 7 |
@@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2651 | ee_block = le32_to_cpu(ex->ee_block); | 2548 | ee_block = le32_to_cpu(ex->ee_block); |
2652 | ee_len = ext4_ext_get_actual_len(ex); | 2549 | ee_len = ext4_ext_get_actual_len(ex); |
2653 | allocated = ee_len - (map->m_lblk - ee_block); | 2550 | allocated = ee_len - (map->m_lblk - ee_block); |
2654 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | 2551 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); |
2655 | 2552 | ||
2656 | ex2 = ex; | 2553 | ex2 = ex; |
2657 | orig_ex.ee_block = ex->ee_block; | 2554 | orig_ex.ee_block = ex->ee_block; |
2658 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2555 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2659 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2556 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); |
2660 | 2557 | ||
2661 | /* | 2558 | /* |
2662 | * It is safe to convert extent to initialized via explicit | 2559 | * It is safe to convert extent to initialized via explicit |
@@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2675 | /* update the extent length and mark as initialized */ | 2572 | /* update the extent length and mark as initialized */ |
2676 | ex->ee_block = orig_ex.ee_block; | 2573 | ex->ee_block = orig_ex.ee_block; |
2677 | ex->ee_len = orig_ex.ee_len; | 2574 | ex->ee_len = orig_ex.ee_len; |
2678 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2575 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2679 | ext4_ext_dirty(handle, inode, path + depth); | 2576 | ext4_ext_dirty(handle, inode, path + depth); |
2680 | /* zeroed the full extent */ | 2577 | /* zeroed the full extent */ |
2681 | return allocated; | 2578 | return allocated; |
@@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2710 | ex->ee_block = orig_ex.ee_block; | 2607 | ex->ee_block = orig_ex.ee_block; |
2711 | ex->ee_len = cpu_to_le16(ee_len - allocated); | 2608 | ex->ee_len = cpu_to_le16(ee_len - allocated); |
2712 | ext4_ext_mark_uninitialized(ex); | 2609 | ext4_ext_mark_uninitialized(ex); |
2713 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2610 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2714 | ext4_ext_dirty(handle, inode, path + depth); | 2611 | ext4_ext_dirty(handle, inode, path + depth); |
2715 | 2612 | ||
2716 | ex3 = &newex; | 2613 | ex3 = &newex; |
@@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2725 | goto fix_extent_len; | 2622 | goto fix_extent_len; |
2726 | ex->ee_block = orig_ex.ee_block; | 2623 | ex->ee_block = orig_ex.ee_block; |
2727 | ex->ee_len = orig_ex.ee_len; | 2624 | ex->ee_len = orig_ex.ee_len; |
2728 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2625 | ext4_ext_store_pblock(ex, |
2626 | ext4_ext_pblock(&orig_ex)); | ||
2729 | ext4_ext_dirty(handle, inode, path + depth); | 2627 | ext4_ext_dirty(handle, inode, path + depth); |
2730 | /* blocks available from map->m_lblk */ | 2628 | /* blocks available from map->m_lblk */ |
2731 | return allocated; | 2629 | return allocated; |
@@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2782 | /* update the extent length and mark as initialized */ | 2680 | /* update the extent length and mark as initialized */ |
2783 | ex->ee_block = orig_ex.ee_block; | 2681 | ex->ee_block = orig_ex.ee_block; |
2784 | ex->ee_len = orig_ex.ee_len; | 2682 | ex->ee_len = orig_ex.ee_len; |
2785 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2683 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2786 | ext4_ext_dirty(handle, inode, path + depth); | 2684 | ext4_ext_dirty(handle, inode, path + depth); |
2787 | /* zeroed the full extent */ | 2685 | /* zeroed the full extent */ |
2788 | /* blocks available from map->m_lblk */ | 2686 | /* blocks available from map->m_lblk */ |
@@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2833 | /* update the extent length and mark as initialized */ | 2731 | /* update the extent length and mark as initialized */ |
2834 | ex->ee_block = orig_ex.ee_block; | 2732 | ex->ee_block = orig_ex.ee_block; |
2835 | ex->ee_len = orig_ex.ee_len; | 2733 | ex->ee_len = orig_ex.ee_len; |
2836 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2734 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2837 | ext4_ext_dirty(handle, inode, path + depth); | 2735 | ext4_ext_dirty(handle, inode, path + depth); |
2838 | /* zero out the first half */ | 2736 | /* zero out the first half */ |
2839 | /* blocks available from map->m_lblk */ | 2737 | /* blocks available from map->m_lblk */ |
@@ -2902,7 +2800,7 @@ insert: | |||
2902 | /* update the extent length and mark as initialized */ | 2800 | /* update the extent length and mark as initialized */ |
2903 | ex->ee_block = orig_ex.ee_block; | 2801 | ex->ee_block = orig_ex.ee_block; |
2904 | ex->ee_len = orig_ex.ee_len; | 2802 | ex->ee_len = orig_ex.ee_len; |
2905 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2803 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2906 | ext4_ext_dirty(handle, inode, path + depth); | 2804 | ext4_ext_dirty(handle, inode, path + depth); |
2907 | /* zero out the first half */ | 2805 | /* zero out the first half */ |
2908 | return allocated; | 2806 | return allocated; |
@@ -2915,7 +2813,7 @@ out: | |||
2915 | fix_extent_len: | 2813 | fix_extent_len: |
2916 | ex->ee_block = orig_ex.ee_block; | 2814 | ex->ee_block = orig_ex.ee_block; |
2917 | ex->ee_len = orig_ex.ee_len; | 2815 | ex->ee_len = orig_ex.ee_len; |
2918 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2816 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
2919 | ext4_ext_mark_uninitialized(ex); | 2817 | ext4_ext_mark_uninitialized(ex); |
2920 | ext4_ext_dirty(handle, inode, path + depth); | 2818 | ext4_ext_dirty(handle, inode, path + depth); |
2921 | return err; | 2819 | return err; |
@@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2973 | ee_block = le32_to_cpu(ex->ee_block); | 2871 | ee_block = le32_to_cpu(ex->ee_block); |
2974 | ee_len = ext4_ext_get_actual_len(ex); | 2872 | ee_len = ext4_ext_get_actual_len(ex); |
2975 | allocated = ee_len - (map->m_lblk - ee_block); | 2873 | allocated = ee_len - (map->m_lblk - ee_block); |
2976 | newblock = map->m_lblk - ee_block + ext_pblock(ex); | 2874 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); |
2977 | 2875 | ||
2978 | ex2 = ex; | 2876 | ex2 = ex; |
2979 | orig_ex.ee_block = ex->ee_block; | 2877 | orig_ex.ee_block = ex->ee_block; |
2980 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2878 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2981 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2879 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); |
2982 | 2880 | ||
2983 | /* | 2881 | /* |
2984 | * It is safe to convert extent to initialized via explicit | 2882 | * It is safe to convert extent to initialized via explicit |
@@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3027 | /* update the extent length and mark as initialized */ | 2925 | /* update the extent length and mark as initialized */ |
3028 | ex->ee_block = orig_ex.ee_block; | 2926 | ex->ee_block = orig_ex.ee_block; |
3029 | ex->ee_len = orig_ex.ee_len; | 2927 | ex->ee_len = orig_ex.ee_len; |
3030 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2928 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3031 | ext4_ext_dirty(handle, inode, path + depth); | 2929 | ext4_ext_dirty(handle, inode, path + depth); |
3032 | /* zeroed the full extent */ | 2930 | /* zeroed the full extent */ |
3033 | /* blocks available from map->m_lblk */ | 2931 | /* blocks available from map->m_lblk */ |
@@ -3099,7 +2997,7 @@ insert: | |||
3099 | /* update the extent length and mark as initialized */ | 2997 | /* update the extent length and mark as initialized */ |
3100 | ex->ee_block = orig_ex.ee_block; | 2998 | ex->ee_block = orig_ex.ee_block; |
3101 | ex->ee_len = orig_ex.ee_len; | 2999 | ex->ee_len = orig_ex.ee_len; |
3102 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3000 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3103 | ext4_ext_dirty(handle, inode, path + depth); | 3001 | ext4_ext_dirty(handle, inode, path + depth); |
3104 | /* zero out the first half */ | 3002 | /* zero out the first half */ |
3105 | return allocated; | 3003 | return allocated; |
@@ -3112,7 +3010,7 @@ out: | |||
3112 | fix_extent_len: | 3010 | fix_extent_len: |
3113 | ex->ee_block = orig_ex.ee_block; | 3011 | ex->ee_block = orig_ex.ee_block; |
3114 | ex->ee_len = orig_ex.ee_len; | 3012 | ex->ee_len = orig_ex.ee_len; |
3115 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3013 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); |
3116 | ext4_ext_mark_uninitialized(ex); | 3014 | ext4_ext_mark_uninitialized(ex); |
3117 | ext4_ext_dirty(handle, inode, path + depth); | 3015 | ext4_ext_dirty(handle, inode, path + depth); |
3118 | return err; | 3016 | return err; |
@@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, | |||
3180 | unmap_underlying_metadata(bdev, block + i); | 3078 | unmap_underlying_metadata(bdev, block + i); |
3181 | } | 3079 | } |
3182 | 3080 | ||
3081 | /* | ||
3082 | * Handle EOFBLOCKS_FL flag, clearing it if necessary | ||
3083 | */ | ||
3084 | static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | ||
3085 | struct ext4_map_blocks *map, | ||
3086 | struct ext4_ext_path *path, | ||
3087 | unsigned int len) | ||
3088 | { | ||
3089 | int i, depth; | ||
3090 | struct ext4_extent_header *eh; | ||
3091 | struct ext4_extent *ex, *last_ex; | ||
3092 | |||
3093 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
3094 | return 0; | ||
3095 | |||
3096 | depth = ext_depth(inode); | ||
3097 | eh = path[depth].p_hdr; | ||
3098 | ex = path[depth].p_ext; | ||
3099 | |||
3100 | if (unlikely(!eh->eh_entries)) { | ||
3101 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | ||
3102 | "EOFBLOCKS_FL set"); | ||
3103 | return -EIO; | ||
3104 | } | ||
3105 | last_ex = EXT_LAST_EXTENT(eh); | ||
3106 | /* | ||
3107 | * We should clear the EOFBLOCKS_FL flag if we are writing the | ||
3108 | * last block in the last extent in the file. We test this by | ||
3109 | * first checking to see if the caller to | ||
3110 | * ext4_ext_get_blocks() was interested in the last block (or | ||
3111 | * a block beyond the last block) in the current extent. If | ||
3112 | * this turns out to be false, we can bail out from this | ||
3113 | * function immediately. | ||
3114 | */ | ||
3115 | if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + | ||
3116 | ext4_ext_get_actual_len(last_ex)) | ||
3117 | return 0; | ||
3118 | /* | ||
3119 | * If the caller does appear to be planning to write at or | ||
3120 | * beyond the end of the current extent, we then test to see | ||
3121 | * if the current extent is the last extent in the file, by | ||
3122 | * checking to make sure it was reached via the rightmost node | ||
3123 | * at each level of the tree. | ||
3124 | */ | ||
3125 | for (i = depth-1; i >= 0; i--) | ||
3126 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3127 | return 0; | ||
3128 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3129 | return ext4_mark_inode_dirty(handle, inode); | ||
3130 | } | ||
3131 | |||
3183 | static int | 3132 | static int |
3184 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3133 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3185 | struct ext4_map_blocks *map, | 3134 | struct ext4_map_blocks *map, |
@@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3206 | * completed | 3155 | * completed |
3207 | */ | 3156 | */ |
3208 | if (io) | 3157 | if (io) |
3209 | io->flag = EXT4_IO_UNWRITTEN; | 3158 | io->flag = EXT4_IO_END_UNWRITTEN; |
3210 | else | 3159 | else |
3211 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3160 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3212 | if (ext4_should_dioread_nolock(inode)) | 3161 | if (ext4_should_dioread_nolock(inode)) |
@@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3217 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { | 3166 | if ((flags & EXT4_GET_BLOCKS_CONVERT)) { |
3218 | ret = ext4_convert_unwritten_extents_endio(handle, inode, | 3167 | ret = ext4_convert_unwritten_extents_endio(handle, inode, |
3219 | path); | 3168 | path); |
3220 | if (ret >= 0) | 3169 | if (ret >= 0) { |
3221 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3170 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3171 | err = check_eofblocks_fl(handle, inode, map, path, | ||
3172 | map->m_len); | ||
3173 | } else | ||
3174 | err = ret; | ||
3222 | goto out2; | 3175 | goto out2; |
3223 | } | 3176 | } |
3224 | /* buffered IO case */ | 3177 | /* buffered IO case */ |
@@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3244 | 3197 | ||
3245 | /* buffered write, writepage time, convert*/ | 3198 | /* buffered write, writepage time, convert*/ |
3246 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); | 3199 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
3247 | if (ret >= 0) | 3200 | if (ret >= 0) { |
3248 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3201 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3202 | err = check_eofblocks_fl(handle, inode, map, path, map->m_len); | ||
3203 | if (err < 0) | ||
3204 | goto out2; | ||
3205 | } | ||
3206 | |||
3249 | out: | 3207 | out: |
3250 | if (ret <= 0) { | 3208 | if (ret <= 0) { |
3251 | err = ret; | 3209 | err = ret; |
@@ -3292,6 +3250,7 @@ out2: | |||
3292 | } | 3250 | } |
3293 | return err ? err : allocated; | 3251 | return err ? err : allocated; |
3294 | } | 3252 | } |
3253 | |||
3295 | /* | 3254 | /* |
3296 | * Block allocation/map/preallocation routine for extents based files | 3255 | * Block allocation/map/preallocation routine for extents based files |
3297 | * | 3256 | * |
@@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3315 | { | 3274 | { |
3316 | struct ext4_ext_path *path = NULL; | 3275 | struct ext4_ext_path *path = NULL; |
3317 | struct ext4_extent_header *eh; | 3276 | struct ext4_extent_header *eh; |
3318 | struct ext4_extent newex, *ex, *last_ex; | 3277 | struct ext4_extent newex, *ex; |
3319 | ext4_fsblk_t newblock; | 3278 | ext4_fsblk_t newblock; |
3320 | int i, err = 0, depth, ret, cache_type; | 3279 | int err = 0, depth, ret, cache_type; |
3321 | unsigned int allocated = 0; | 3280 | unsigned int allocated = 0; |
3322 | struct ext4_allocation_request ar; | 3281 | struct ext4_allocation_request ar; |
3323 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3282 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
@@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3341 | /* block is already allocated */ | 3300 | /* block is already allocated */ |
3342 | newblock = map->m_lblk | 3301 | newblock = map->m_lblk |
3343 | - le32_to_cpu(newex.ee_block) | 3302 | - le32_to_cpu(newex.ee_block) |
3344 | + ext_pblock(&newex); | 3303 | + ext4_ext_pblock(&newex); |
3345 | /* number of remaining blocks in the extent */ | 3304 | /* number of remaining blocks in the extent */ |
3346 | allocated = ext4_ext_get_actual_len(&newex) - | 3305 | allocated = ext4_ext_get_actual_len(&newex) - |
3347 | (map->m_lblk - le32_to_cpu(newex.ee_block)); | 3306 | (map->m_lblk - le32_to_cpu(newex.ee_block)); |
@@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3379 | ex = path[depth].p_ext; | 3338 | ex = path[depth].p_ext; |
3380 | if (ex) { | 3339 | if (ex) { |
3381 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 3340 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
3382 | ext4_fsblk_t ee_start = ext_pblock(ex); | 3341 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
3383 | unsigned short ee_len; | 3342 | unsigned short ee_len; |
3384 | 3343 | ||
3385 | /* | 3344 | /* |
@@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3488 | */ | 3447 | */ |
3489 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3448 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3490 | if (io) | 3449 | if (io) |
3491 | io->flag = EXT4_IO_UNWRITTEN; | 3450 | io->flag = EXT4_IO_END_UNWRITTEN; |
3492 | else | 3451 | else |
3493 | ext4_set_inode_state(inode, | 3452 | ext4_set_inode_state(inode, |
3494 | EXT4_STATE_DIO_UNWRITTEN); | 3453 | EXT4_STATE_DIO_UNWRITTEN); |
@@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3497 | map->m_flags |= EXT4_MAP_UNINIT; | 3456 | map->m_flags |= EXT4_MAP_UNINIT; |
3498 | } | 3457 | } |
3499 | 3458 | ||
3500 | if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { | 3459 | err = check_eofblocks_fl(handle, inode, map, path, ar.len); |
3501 | if (unlikely(!eh->eh_entries)) { | 3460 | if (err) |
3502 | EXT4_ERROR_INODE(inode, | 3461 | goto out2; |
3503 | "eh->eh_entries == 0 and " | 3462 | |
3504 | "EOFBLOCKS_FL set"); | ||
3505 | err = -EIO; | ||
3506 | goto out2; | ||
3507 | } | ||
3508 | last_ex = EXT_LAST_EXTENT(eh); | ||
3509 | /* | ||
3510 | * If the current leaf block was reached by looking at | ||
3511 | * the last index block all the way down the tree, and | ||
3512 | * we are extending the inode beyond the last extent | ||
3513 | * in the current leaf block, then clear the | ||
3514 | * EOFBLOCKS_FL flag. | ||
3515 | */ | ||
3516 | for (i = depth-1; i >= 0; i--) { | ||
3517 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3518 | break; | ||
3519 | } | ||
3520 | if ((i < 0) && | ||
3521 | (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + | ||
3522 | ext4_ext_get_actual_len(last_ex))) | ||
3523 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3524 | } | ||
3525 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3463 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3526 | if (err) { | 3464 | if (err) { |
3527 | /* free data blocks we just allocated */ | 3465 | /* free data blocks we just allocated */ |
3528 | /* not a good idea to call discard here directly, | 3466 | /* not a good idea to call discard here directly, |
3529 | * but otherwise we'd need to call it every free() */ | 3467 | * but otherwise we'd need to call it every free() */ |
3530 | ext4_discard_preallocations(inode); | 3468 | ext4_discard_preallocations(inode); |
3531 | ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), | 3469 | ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), |
3532 | ext4_ext_get_actual_len(&newex), 0); | 3470 | ext4_ext_get_actual_len(&newex), 0); |
3533 | goto out2; | 3471 | goto out2; |
3534 | } | 3472 | } |
3535 | 3473 | ||
3536 | /* previous routine could use block we allocated */ | 3474 | /* previous routine could use block we allocated */ |
3537 | newblock = ext_pblock(&newex); | 3475 | newblock = ext4_ext_pblock(&newex); |
3538 | allocated = ext4_ext_get_actual_len(&newex); | 3476 | allocated = ext4_ext_get_actual_len(&newex); |
3539 | if (allocated > map->m_len) | 3477 | if (allocated > map->m_len) |
3540 | allocated = map->m_len; | 3478 | allocated = map->m_len; |
@@ -3729,7 +3667,7 @@ retry: | |||
3729 | printk(KERN_ERR "%s: ext4_ext_map_blocks " | 3667 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3730 | "returned error inode#%lu, block=%u, " | 3668 | "returned error inode#%lu, block=%u, " |
3731 | "max_blocks=%u", __func__, | 3669 | "max_blocks=%u", __func__, |
3732 | inode->i_ino, block, max_blocks); | 3670 | inode->i_ino, map.m_lblk, max_blocks); |
3733 | #endif | 3671 | #endif |
3734 | ext4_mark_inode_dirty(handle, inode); | 3672 | ext4_mark_inode_dirty(handle, inode); |
3735 | ret2 = ext4_journal_stop(handle); | 3673 | ret2 = ext4_journal_stop(handle); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ee92b66d4558..5a5c55ddceef 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
130 | return dquot_file_open(inode, filp); | 130 | return dquot_file_open(inode, filp); |
131 | } | 131 | } |
132 | 132 | ||
133 | /* | ||
134 | * ext4_llseek() copied from generic_file_llseek() to handle both | ||
135 | * block-mapped and extent-mapped maxbytes values. This should | ||
136 | * otherwise be identical with generic_file_llseek(). | ||
137 | */ | ||
138 | loff_t ext4_llseek(struct file *file, loff_t offset, int origin) | ||
139 | { | ||
140 | struct inode *inode = file->f_mapping->host; | ||
141 | loff_t maxbytes; | ||
142 | |||
143 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | ||
144 | maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; | ||
145 | else | ||
146 | maxbytes = inode->i_sb->s_maxbytes; | ||
147 | mutex_lock(&inode->i_mutex); | ||
148 | switch (origin) { | ||
149 | case SEEK_END: | ||
150 | offset += inode->i_size; | ||
151 | break; | ||
152 | case SEEK_CUR: | ||
153 | if (offset == 0) { | ||
154 | mutex_unlock(&inode->i_mutex); | ||
155 | return file->f_pos; | ||
156 | } | ||
157 | offset += file->f_pos; | ||
158 | break; | ||
159 | } | ||
160 | |||
161 | if (offset < 0 || offset > maxbytes) { | ||
162 | mutex_unlock(&inode->i_mutex); | ||
163 | return -EINVAL; | ||
164 | } | ||
165 | |||
166 | if (offset != file->f_pos) { | ||
167 | file->f_pos = offset; | ||
168 | file->f_version = 0; | ||
169 | } | ||
170 | mutex_unlock(&inode->i_mutex); | ||
171 | |||
172 | return offset; | ||
173 | } | ||
174 | |||
133 | const struct file_operations ext4_file_operations = { | 175 | const struct file_operations ext4_file_operations = { |
134 | .llseek = generic_file_llseek, | 176 | .llseek = ext4_llseek, |
135 | .read = do_sync_read, | 177 | .read = do_sync_read, |
136 | .write = do_sync_write, | 178 | .write = do_sync_write, |
137 | .aio_read = generic_file_aio_read, | 179 | .aio_read = generic_file_aio_read, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 3f3ff5ee8f9d..c1a7bc923cf6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -34,6 +34,89 @@ | |||
34 | 34 | ||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | ||
38 | { | ||
39 | #ifdef EXT4_DEBUG | ||
40 | struct list_head *cur, *before, *after; | ||
41 | ext4_io_end_t *io, *io0, *io1; | ||
42 | unsigned long flags; | ||
43 | |||
44 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
45 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
50 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
51 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
52 | cur = &io->list; | ||
53 | before = cur->prev; | ||
54 | io0 = container_of(before, ext4_io_end_t, list); | ||
55 | after = cur->next; | ||
56 | io1 = container_of(after, ext4_io_end_t, list); | ||
57 | |||
58 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
59 | io, inode->i_ino, io0, io1); | ||
60 | } | ||
61 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This function is called from ext4_sync_file(). | ||
67 | * | ||
68 | * When IO is completed, the work to convert unwritten extents to | ||
69 | * written is queued on workqueue but may not get immediately | ||
70 | * scheduled. When fsync is called, we need to ensure the | ||
71 | * conversion is complete before fsync returns. | ||
72 | * The inode keeps track of a list of pending/completed IO that | ||
73 | * might needs to do the conversion. This function walks through | ||
74 | * the list and convert the related unwritten extents for completed IO | ||
75 | * to written. | ||
76 | * The function return the number of pending IOs on success. | ||
77 | */ | ||
78 | static int flush_completed_IO(struct inode *inode) | ||
79 | { | ||
80 | ext4_io_end_t *io; | ||
81 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
82 | unsigned long flags; | ||
83 | int ret = 0; | ||
84 | int ret2 = 0; | ||
85 | |||
86 | if (list_empty(&ei->i_completed_io_list)) | ||
87 | return ret; | ||
88 | |||
89 | dump_completed_IO(inode); | ||
90 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
91 | while (!list_empty(&ei->i_completed_io_list)){ | ||
92 | io = list_entry(ei->i_completed_io_list.next, | ||
93 | ext4_io_end_t, list); | ||
94 | /* | ||
95 | * Calling ext4_end_io_nolock() to convert completed | ||
96 | * IO to written. | ||
97 | * | ||
98 | * When ext4_sync_file() is called, run_queue() may already | ||
99 | * about to flush the work corresponding to this io structure. | ||
100 | * It will be upset if it founds the io structure related | ||
101 | * to the work-to-be schedule is freed. | ||
102 | * | ||
103 | * Thus we need to keep the io structure still valid here after | ||
104 | * convertion finished. The io structure has a flag to | ||
105 | * avoid double converting from both fsync and background work | ||
106 | * queue work. | ||
107 | */ | ||
108 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
109 | ret = ext4_end_io_nolock(io); | ||
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
111 | if (ret < 0) | ||
112 | ret2 = ret; | ||
113 | else | ||
114 | list_del_init(&io->list); | ||
115 | } | ||
116 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
117 | return (ret2 < 0) ? ret2 : 0; | ||
118 | } | ||
119 | |||
37 | /* | 120 | /* |
38 | * If we're not journaling and this is a just-created file, we have to | 121 | * If we're not journaling and this is a just-created file, we have to |
39 | * sync our parent directory (if it was freshly created) since | 122 | * sync our parent directory (if it was freshly created) since |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 45853e0d1f21..1ce240a23ebb 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -50,7 +50,7 @@ | |||
50 | * need to use it within a single byte (to ensure we get endianness right). | 50 | * need to use it within a single byte (to ensure we get endianness right). |
51 | * We can use memset for the rest of the bitmap as there are no other users. | 51 | * We can use memset for the rest of the bitmap as there are no other users. |
52 | */ | 52 | */ |
53 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | 53 | void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | 56 | ||
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
65 | } | 65 | } |
66 | 66 | ||
67 | /* Initializes an uninitialized inode bitmap */ | 67 | /* Initializes an uninitialized inode bitmap */ |
68 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | 68 | static unsigned ext4_init_inode_bitmap(struct super_block *sb, |
69 | ext4_group_t block_group, | 69 | struct buffer_head *bh, |
70 | struct ext4_group_desc *gdp) | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | ||
71 | { | 72 | { |
72 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 73 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
73 | 74 | ||
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
85 | } | 86 | } |
86 | 87 | ||
87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
89 | bh->b_data); | 90 | bh->b_data); |
90 | 91 | ||
91 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
107 | desc = ext4_get_group_desc(sb, block_group, NULL); | 108 | desc = ext4_get_group_desc(sb, block_group, NULL); |
108 | if (!desc) | 109 | if (!desc) |
109 | return NULL; | 110 | return NULL; |
111 | |||
110 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 112 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
111 | bh = sb_getblk(sb, bitmap_blk); | 113 | bh = sb_getblk(sb, bitmap_blk); |
112 | if (unlikely(!bh)) { | 114 | if (unlikely(!bh)) { |
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 125 | unlock_buffer(bh); |
124 | return bh; | 126 | return bh; |
125 | } | 127 | } |
128 | |||
126 | ext4_lock_group(sb, block_group); | 129 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 130 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 131 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
133 | return bh; | 136 | return bh; |
134 | } | 137 | } |
135 | ext4_unlock_group(sb, block_group); | 138 | ext4_unlock_group(sb, block_group); |
139 | |||
136 | if (buffer_uptodate(bh)) { | 140 | if (buffer_uptodate(bh)) { |
137 | /* | 141 | /* |
138 | * if not uninit if bh is uptodate, | 142 | * if not uninit if bh is uptodate, |
@@ -411,8 +415,8 @@ struct orlov_stats { | |||
411 | * for a particular block group or flex_bg. If flex_size is 1, then g | 415 | * for a particular block group or flex_bg. If flex_size is 1, then g |
412 | * is a block group number; otherwise it is flex_bg number. | 416 | * is a block group number; otherwise it is flex_bg number. |
413 | */ | 417 | */ |
414 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | 418 | static void get_orlov_stats(struct super_block *sb, ext4_group_t g, |
415 | int flex_size, struct orlov_stats *stats) | 419 | int flex_size, struct orlov_stats *stats) |
416 | { | 420 | { |
417 | struct ext4_group_desc *desc; | 421 | struct ext4_group_desc *desc; |
418 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; | 422 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb, | |||
712 | { | 716 | { |
713 | int free = 0, retval = 0, count; | 717 | int free = 0, retval = 0, count; |
714 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 718 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
719 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
715 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 720 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
716 | 721 | ||
722 | /* | ||
723 | * We have to be sure that new inode allocation does not race with | ||
724 | * inode table initialization, because otherwise we may end up | ||
725 | * allocating and writing new inode right before sb_issue_zeroout | ||
726 | * takes place and overwriting our new inode with zeroes. So we | ||
727 | * take alloc_sem to prevent it. | ||
728 | */ | ||
729 | down_read(&grp->alloc_sem); | ||
717 | ext4_lock_group(sb, group); | 730 | ext4_lock_group(sb, group); |
718 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 731 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
719 | /* not a free inode */ | 732 | /* not a free inode */ |
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
724 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 737 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
725 | ino > EXT4_INODES_PER_GROUP(sb)) { | 738 | ino > EXT4_INODES_PER_GROUP(sb)) { |
726 | ext4_unlock_group(sb, group); | 739 | ext4_unlock_group(sb, group); |
740 | up_read(&grp->alloc_sem); | ||
727 | ext4_error(sb, "reserved inode or inode > inodes count - " | 741 | ext4_error(sb, "reserved inode or inode > inodes count - " |
728 | "block_group = %u, inode=%lu", group, | 742 | "block_group = %u, inode=%lu", group, |
729 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 743 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
772 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
773 | err_ret: | 787 | err_ret: |
774 | ext4_unlock_group(sb, group); | 788 | ext4_unlock_group(sb, group); |
789 | up_read(&grp->alloc_sem); | ||
775 | return retval; | 790 | return retval; |
776 | } | 791 | } |
777 | 792 | ||
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1205 | } | 1220 | } |
1206 | return count; | 1221 | return count; |
1207 | } | 1222 | } |
1223 | |||
1224 | /* | ||
1225 | * Zeroes not yet zeroed inode table - just write zeroes through the whole | ||
1226 | * inode table. Must be called without any spinlock held. The only place | ||
1227 | * where it is called from on active part of filesystem is ext4lazyinit | ||
1228 | * thread, so we do not need any special locks, however we have to prevent | ||
1229 | * inode allocation from the current group, so we take alloc_sem lock, to | ||
1230 | * block ext4_claim_inode until we are finished. | ||
1231 | */ | ||
1232 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | ||
1233 | int barrier) | ||
1234 | { | ||
1235 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
1236 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1237 | struct ext4_group_desc *gdp = NULL; | ||
1238 | struct buffer_head *group_desc_bh; | ||
1239 | handle_t *handle; | ||
1240 | ext4_fsblk_t blk; | ||
1241 | int num, ret = 0, used_blks = 0; | ||
1242 | |||
1243 | /* This should not happen, but just to be sure check this */ | ||
1244 | if (sb->s_flags & MS_RDONLY) { | ||
1245 | ret = 1; | ||
1246 | goto out; | ||
1247 | } | ||
1248 | |||
1249 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | ||
1250 | if (!gdp) | ||
1251 | goto out; | ||
1252 | |||
1253 | /* | ||
1254 | * We do not need to lock this, because we are the only one | ||
1255 | * handling this flag. | ||
1256 | */ | ||
1257 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
1258 | goto out; | ||
1259 | |||
1260 | handle = ext4_journal_start_sb(sb, 1); | ||
1261 | if (IS_ERR(handle)) { | ||
1262 | ret = PTR_ERR(handle); | ||
1263 | goto out; | ||
1264 | } | ||
1265 | |||
1266 | down_write(&grp->alloc_sem); | ||
1267 | /* | ||
1268 | * If inode bitmap was already initialized there may be some | ||
1269 | * used inodes so we need to skip blocks with used inodes in | ||
1270 | * inode table. | ||
1271 | */ | ||
1272 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
1273 | used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - | ||
1274 | ext4_itable_unused_count(sb, gdp)), | ||
1275 | sbi->s_inodes_per_block); | ||
1276 | |||
1277 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | ||
1278 | ext4_error(sb, "Something is wrong with group %u\n" | ||
1279 | "Used itable blocks: %d" | ||
1280 | "itable unused count: %u\n", | ||
1281 | group, used_blks, | ||
1282 | ext4_itable_unused_count(sb, gdp)); | ||
1283 | ret = 1; | ||
1284 | goto out; | ||
1285 | } | ||
1286 | |||
1287 | blk = ext4_inode_table(sb, gdp) + used_blks; | ||
1288 | num = sbi->s_itb_per_group - used_blks; | ||
1289 | |||
1290 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
1291 | ret = ext4_journal_get_write_access(handle, | ||
1292 | group_desc_bh); | ||
1293 | if (ret) | ||
1294 | goto err_out; | ||
1295 | |||
1296 | /* | ||
1297 | * Skip zeroout if the inode table is full. But we set the ZEROED | ||
1298 | * flag anyway, because obviously, when it is full it does not need | ||
1299 | * further zeroing. | ||
1300 | */ | ||
1301 | if (unlikely(num == 0)) | ||
1302 | goto skip_zeroout; | ||
1303 | |||
1304 | ext4_debug("going to zero out inode table in group %d\n", | ||
1305 | group); | ||
1306 | ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); | ||
1307 | if (ret < 0) | ||
1308 | goto err_out; | ||
1309 | if (barrier) | ||
1310 | blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); | ||
1311 | |||
1312 | skip_zeroout: | ||
1313 | ext4_lock_group(sb, group); | ||
1314 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
1315 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1316 | ext4_unlock_group(sb, group); | ||
1317 | |||
1318 | BUFFER_TRACE(group_desc_bh, | ||
1319 | "call ext4_handle_dirty_metadata"); | ||
1320 | ret = ext4_handle_dirty_metadata(handle, NULL, | ||
1321 | group_desc_bh); | ||
1322 | |||
1323 | err_out: | ||
1324 | up_write(&grp->alloc_sem); | ||
1325 | ext4_journal_stop(handle); | ||
1326 | out: | ||
1327 | return ret; | ||
1328 | } | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49635ef236f8..2d6c6c8c036d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
60 | } | 60 | } |
61 | 61 | ||
62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
63 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
64 | struct buffer_head *bh_result, int create); | ||
65 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
66 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
67 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
68 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
63 | 69 | ||
64 | /* | 70 | /* |
65 | * Test whether an inode is a fast symlink. | 71 | * Test whether an inode is a fast symlink. |
@@ -755,6 +761,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
755 | * parent to disk. | 761 | * parent to disk. |
756 | */ | 762 | */ |
757 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 763 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
764 | if (unlikely(!bh)) { | ||
765 | err = -EIO; | ||
766 | goto failed; | ||
767 | } | ||
768 | |||
758 | branch[n].bh = bh; | 769 | branch[n].bh = bh; |
759 | lock_buffer(bh); | 770 | lock_buffer(bh); |
760 | BUFFER_TRACE(bh, "call get_create_access"); | 771 | BUFFER_TRACE(bh, "call get_create_access"); |
@@ -1207,8 +1218,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1207 | break; | 1218 | break; |
1208 | idx++; | 1219 | idx++; |
1209 | num++; | 1220 | num++; |
1210 | if (num >= max_pages) | 1221 | if (num >= max_pages) { |
1222 | done = 1; | ||
1211 | break; | 1223 | break; |
1224 | } | ||
1212 | } | 1225 | } |
1213 | pagevec_release(&pvec); | 1226 | pagevec_release(&pvec); |
1214 | } | 1227 | } |
@@ -1995,16 +2008,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1995 | * | 2008 | * |
1996 | * As pages are already locked by write_cache_pages(), we can't use it | 2009 | * As pages are already locked by write_cache_pages(), we can't use it |
1997 | */ | 2010 | */ |
1998 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2011 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2012 | struct ext4_map_blocks *map) | ||
1999 | { | 2013 | { |
2000 | long pages_skipped; | ||
2001 | struct pagevec pvec; | 2014 | struct pagevec pvec; |
2002 | unsigned long index, end; | 2015 | unsigned long index, end; |
2003 | int ret = 0, err, nr_pages, i; | 2016 | int ret = 0, err, nr_pages, i; |
2004 | struct inode *inode = mpd->inode; | 2017 | struct inode *inode = mpd->inode; |
2005 | struct address_space *mapping = inode->i_mapping; | 2018 | struct address_space *mapping = inode->i_mapping; |
2019 | loff_t size = i_size_read(inode); | ||
2020 | unsigned int len, block_start; | ||
2021 | struct buffer_head *bh, *page_bufs = NULL; | ||
2022 | int journal_data = ext4_should_journal_data(inode); | ||
2023 | sector_t pblock = 0, cur_logical = 0; | ||
2024 | struct ext4_io_submit io_submit; | ||
2006 | 2025 | ||
2007 | BUG_ON(mpd->next_page <= mpd->first_page); | 2026 | BUG_ON(mpd->next_page <= mpd->first_page); |
2027 | memset(&io_submit, 0, sizeof(io_submit)); | ||
2008 | /* | 2028 | /* |
2009 | * We need to start from the first_page to the next_page - 1 | 2029 | * We need to start from the first_page to the next_page - 1 |
2010 | * to make sure we also write the mapped dirty buffer_heads. | 2030 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -2020,122 +2040,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2020 | if (nr_pages == 0) | 2040 | if (nr_pages == 0) |
2021 | break; | 2041 | break; |
2022 | for (i = 0; i < nr_pages; i++) { | 2042 | for (i = 0; i < nr_pages; i++) { |
2043 | int commit_write = 0, redirty_page = 0; | ||
2023 | struct page *page = pvec.pages[i]; | 2044 | struct page *page = pvec.pages[i]; |
2024 | 2045 | ||
2025 | index = page->index; | 2046 | index = page->index; |
2026 | if (index > end) | 2047 | if (index > end) |
2027 | break; | 2048 | break; |
2049 | |||
2050 | if (index == size >> PAGE_CACHE_SHIFT) | ||
2051 | len = size & ~PAGE_CACHE_MASK; | ||
2052 | else | ||
2053 | len = PAGE_CACHE_SIZE; | ||
2054 | if (map) { | ||
2055 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
2056 | inode->i_blkbits); | ||
2057 | pblock = map->m_pblk + (cur_logical - | ||
2058 | map->m_lblk); | ||
2059 | } | ||
2028 | index++; | 2060 | index++; |
2029 | 2061 | ||
2030 | BUG_ON(!PageLocked(page)); | 2062 | BUG_ON(!PageLocked(page)); |
2031 | BUG_ON(PageWriteback(page)); | 2063 | BUG_ON(PageWriteback(page)); |
2032 | 2064 | ||
2033 | pages_skipped = mpd->wbc->pages_skipped; | ||
2034 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
2035 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
2036 | /* | ||
2037 | * have successfully written the page | ||
2038 | * without skipping the same | ||
2039 | */ | ||
2040 | mpd->pages_written++; | ||
2041 | /* | 2065 | /* |
2042 | * In error case, we have to continue because | 2066 | * If the page does not have buffers (for |
2043 | * remaining pages are still locked | 2067 | * whatever reason), try to create them using |
2044 | * XXX: unlock and re-dirty them? | 2068 | * __block_write_begin. If this fails, |
2069 | * redirty the page and move on. | ||
2045 | */ | 2070 | */ |
2046 | if (ret == 0) | 2071 | if (!page_has_buffers(page)) { |
2047 | ret = err; | 2072 | if (__block_write_begin(page, 0, len, |
2048 | } | 2073 | noalloc_get_block_write)) { |
2049 | pagevec_release(&pvec); | 2074 | redirty_page: |
2050 | } | 2075 | redirty_page_for_writepage(mpd->wbc, |
2051 | return ret; | 2076 | page); |
2052 | } | 2077 | unlock_page(page); |
2053 | 2078 | continue; | |
2054 | /* | 2079 | } |
2055 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2080 | commit_write = 1; |
2056 | * | 2081 | } |
2057 | * the function goes through all passed space and put actual disk | ||
2058 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
2059 | */ | ||
2060 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
2061 | struct ext4_map_blocks *map) | ||
2062 | { | ||
2063 | struct inode *inode = mpd->inode; | ||
2064 | struct address_space *mapping = inode->i_mapping; | ||
2065 | int blocks = map->m_len; | ||
2066 | sector_t pblock = map->m_pblk, cur_logical; | ||
2067 | struct buffer_head *head, *bh; | ||
2068 | pgoff_t index, end; | ||
2069 | struct pagevec pvec; | ||
2070 | int nr_pages, i; | ||
2071 | |||
2072 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2073 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2074 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2075 | |||
2076 | pagevec_init(&pvec, 0); | ||
2077 | |||
2078 | while (index <= end) { | ||
2079 | /* XXX: optimize tail */ | ||
2080 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
2081 | if (nr_pages == 0) | ||
2082 | break; | ||
2083 | for (i = 0; i < nr_pages; i++) { | ||
2084 | struct page *page = pvec.pages[i]; | ||
2085 | |||
2086 | index = page->index; | ||
2087 | if (index > end) | ||
2088 | break; | ||
2089 | index++; | ||
2090 | |||
2091 | BUG_ON(!PageLocked(page)); | ||
2092 | BUG_ON(PageWriteback(page)); | ||
2093 | BUG_ON(!page_has_buffers(page)); | ||
2094 | |||
2095 | bh = page_buffers(page); | ||
2096 | head = bh; | ||
2097 | |||
2098 | /* skip blocks out of the range */ | ||
2099 | do { | ||
2100 | if (cur_logical >= map->m_lblk) | ||
2101 | break; | ||
2102 | cur_logical++; | ||
2103 | } while ((bh = bh->b_this_page) != head); | ||
2104 | 2082 | ||
2083 | bh = page_bufs = page_buffers(page); | ||
2084 | block_start = 0; | ||
2105 | do { | 2085 | do { |
2106 | if (cur_logical >= map->m_lblk + blocks) | 2086 | if (!bh) |
2107 | break; | 2087 | goto redirty_page; |
2108 | 2088 | if (map && (cur_logical >= map->m_lblk) && | |
2109 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2089 | (cur_logical <= (map->m_lblk + |
2110 | 2090 | (map->m_len - 1)))) { | |
2111 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
2112 | |||
2113 | if (buffer_delay(bh)) { | 2091 | if (buffer_delay(bh)) { |
2114 | clear_buffer_delay(bh); | 2092 | clear_buffer_delay(bh); |
2115 | bh->b_blocknr = pblock; | 2093 | bh->b_blocknr = pblock; |
2116 | } else { | ||
2117 | /* | ||
2118 | * unwritten already should have | ||
2119 | * blocknr assigned. Verify that | ||
2120 | */ | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | BUG_ON(bh->b_blocknr != pblock); | ||
2123 | } | 2094 | } |
2095 | if (buffer_unwritten(bh) || | ||
2096 | buffer_mapped(bh)) | ||
2097 | BUG_ON(bh->b_blocknr != pblock); | ||
2098 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
2099 | set_buffer_uninit(bh); | ||
2100 | clear_buffer_unwritten(bh); | ||
2101 | } | ||
2124 | 2102 | ||
2125 | } else if (buffer_mapped(bh)) | 2103 | /* redirty page if block allocation undone */ |
2126 | BUG_ON(bh->b_blocknr != pblock); | 2104 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2127 | 2105 | redirty_page = 1; | |
2128 | if (map->m_flags & EXT4_MAP_UNINIT) | 2106 | bh = bh->b_this_page; |
2129 | set_buffer_uninit(bh); | 2107 | block_start += bh->b_size; |
2130 | cur_logical++; | 2108 | cur_logical++; |
2131 | pblock++; | 2109 | pblock++; |
2132 | } while ((bh = bh->b_this_page) != head); | 2110 | } while (bh != page_bufs); |
2111 | |||
2112 | if (redirty_page) | ||
2113 | goto redirty_page; | ||
2114 | |||
2115 | if (commit_write) | ||
2116 | /* mark the buffer_heads as dirty & uptodate */ | ||
2117 | block_commit_write(page, 0, len); | ||
2118 | |||
2119 | /* | ||
2120 | * Delalloc doesn't support data journalling, | ||
2121 | * but eventually maybe we'll lift this | ||
2122 | * restriction. | ||
2123 | */ | ||
2124 | if (unlikely(journal_data && PageChecked(page))) | ||
2125 | err = __ext4_journalled_writepage(page, len); | ||
2126 | else | ||
2127 | err = ext4_bio_write_page(&io_submit, page, | ||
2128 | len, mpd->wbc); | ||
2129 | |||
2130 | if (!err) | ||
2131 | mpd->pages_written++; | ||
2132 | /* | ||
2133 | * In error case, we have to continue because | ||
2134 | * remaining pages are still locked | ||
2135 | */ | ||
2136 | if (ret == 0) | ||
2137 | ret = err; | ||
2133 | } | 2138 | } |
2134 | pagevec_release(&pvec); | 2139 | pagevec_release(&pvec); |
2135 | } | 2140 | } |
2141 | ext4_io_submit(&io_submit); | ||
2142 | return ret; | ||
2136 | } | 2143 | } |
2137 | 2144 | ||
2138 | |||
2139 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2145 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2140 | sector_t logical, long blk_cnt) | 2146 | sector_t logical, long blk_cnt) |
2141 | { | 2147 | { |
@@ -2187,35 +2193,32 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2187 | } | 2193 | } |
2188 | 2194 | ||
2189 | /* | 2195 | /* |
2190 | * mpage_da_map_blocks - go through given space | 2196 | * mpage_da_map_and_submit - go through given space, map them |
2197 | * if necessary, and then submit them for I/O | ||
2191 | * | 2198 | * |
2192 | * @mpd - bh describing space | 2199 | * @mpd - bh describing space |
2193 | * | 2200 | * |
2194 | * The function skips space we know is already mapped to disk blocks. | 2201 | * The function skips space we know is already mapped to disk blocks. |
2195 | * | 2202 | * |
2196 | */ | 2203 | */ |
2197 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2204 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2198 | { | 2205 | { |
2199 | int err, blks, get_blocks_flags; | 2206 | int err, blks, get_blocks_flags; |
2200 | struct ext4_map_blocks map; | 2207 | struct ext4_map_blocks map, *mapp = NULL; |
2201 | sector_t next = mpd->b_blocknr; | 2208 | sector_t next = mpd->b_blocknr; |
2202 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2209 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2203 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2210 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2204 | handle_t *handle = NULL; | 2211 | handle_t *handle = NULL; |
2205 | 2212 | ||
2206 | /* | 2213 | /* |
2207 | * We consider only non-mapped and non-allocated blocks | 2214 | * If the blocks are mapped already, or we couldn't accumulate |
2208 | */ | 2215 | * any blocks, then proceed immediately to the submission stage. |
2209 | if ((mpd->b_state & (1 << BH_Mapped)) && | ||
2210 | !(mpd->b_state & (1 << BH_Delay)) && | ||
2211 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
2212 | return 0; | ||
2213 | |||
2214 | /* | ||
2215 | * If we didn't accumulate anything to write simply return | ||
2216 | */ | 2216 | */ |
2217 | if (!mpd->b_size) | 2217 | if ((mpd->b_size == 0) || |
2218 | return 0; | 2218 | ((mpd->b_state & (1 << BH_Mapped)) && |
2219 | !(mpd->b_state & (1 << BH_Delay)) && | ||
2220 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
2221 | goto submit_io; | ||
2219 | 2222 | ||
2220 | handle = ext4_journal_current_handle(); | 2223 | handle = ext4_journal_current_handle(); |
2221 | BUG_ON(!handle); | 2224 | BUG_ON(!handle); |
@@ -2252,17 +2255,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2252 | 2255 | ||
2253 | err = blks; | 2256 | err = blks; |
2254 | /* | 2257 | /* |
2255 | * If get block returns with error we simply | 2258 | * If get block returns EAGAIN or ENOSPC and there |
2256 | * return. Later writepage will redirty the page and | 2259 | * appears to be free blocks we will call |
2257 | * writepages will find the dirty page again | 2260 | * ext4_writepage() for all of the pages which will |
2261 | * just redirty the pages. | ||
2258 | */ | 2262 | */ |
2259 | if (err == -EAGAIN) | 2263 | if (err == -EAGAIN) |
2260 | return 0; | 2264 | goto submit_io; |
2261 | 2265 | ||
2262 | if (err == -ENOSPC && | 2266 | if (err == -ENOSPC && |
2263 | ext4_count_free_blocks(sb)) { | 2267 | ext4_count_free_blocks(sb)) { |
2264 | mpd->retval = err; | 2268 | mpd->retval = err; |
2265 | return 0; | 2269 | goto submit_io; |
2266 | } | 2270 | } |
2267 | 2271 | ||
2268 | /* | 2272 | /* |
@@ -2287,10 +2291,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2287 | /* invalidate all the pages */ | 2291 | /* invalidate all the pages */ |
2288 | ext4_da_block_invalidatepages(mpd, next, | 2292 | ext4_da_block_invalidatepages(mpd, next, |
2289 | mpd->b_size >> mpd->inode->i_blkbits); | 2293 | mpd->b_size >> mpd->inode->i_blkbits); |
2290 | return err; | 2294 | return; |
2291 | } | 2295 | } |
2292 | BUG_ON(blks == 0); | 2296 | BUG_ON(blks == 0); |
2293 | 2297 | ||
2298 | mapp = ↦ | ||
2294 | if (map.m_flags & EXT4_MAP_NEW) { | 2299 | if (map.m_flags & EXT4_MAP_NEW) { |
2295 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2300 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2296 | int i; | 2301 | int i; |
@@ -2299,18 +2304,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2299 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2304 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2300 | } | 2305 | } |
2301 | 2306 | ||
2302 | /* | ||
2303 | * If blocks are delayed marked, we need to | ||
2304 | * put actual blocknr and drop delayed bit | ||
2305 | */ | ||
2306 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
2307 | (mpd->b_state & (1 << BH_Unwritten))) | ||
2308 | mpage_put_bnr_to_bhs(mpd, &map); | ||
2309 | |||
2310 | if (ext4_should_order_data(mpd->inode)) { | 2307 | if (ext4_should_order_data(mpd->inode)) { |
2311 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2308 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2312 | if (err) | 2309 | if (err) |
2313 | return err; | 2310 | /* This only happens if the journal is aborted */ |
2311 | return; | ||
2314 | } | 2312 | } |
2315 | 2313 | ||
2316 | /* | 2314 | /* |
@@ -2321,10 +2319,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2321 | disksize = i_size_read(mpd->inode); | 2319 | disksize = i_size_read(mpd->inode); |
2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2320 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2323 | ext4_update_i_disksize(mpd->inode, disksize); | 2321 | ext4_update_i_disksize(mpd->inode, disksize); |
2324 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2322 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2323 | if (err) | ||
2324 | ext4_error(mpd->inode->i_sb, | ||
2325 | "Failed to mark inode %lu dirty", | ||
2326 | mpd->inode->i_ino); | ||
2325 | } | 2327 | } |
2326 | 2328 | ||
2327 | return 0; | 2329 | submit_io: |
2330 | mpage_da_submit_io(mpd, mapp); | ||
2331 | mpd->io_done = 1; | ||
2328 | } | 2332 | } |
2329 | 2333 | ||
2330 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2334 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -2401,9 +2405,7 @@ flush_it: | |||
2401 | * We couldn't merge the block to our extent, so we | 2405 | * We couldn't merge the block to our extent, so we |
2402 | * need to flush current extent and start new one | 2406 | * need to flush current extent and start new one |
2403 | */ | 2407 | */ |
2404 | if (mpage_da_map_blocks(mpd) == 0) | 2408 | mpage_da_map_and_submit(mpd); |
2405 | mpage_da_submit_io(mpd); | ||
2406 | mpd->io_done = 1; | ||
2407 | return; | 2409 | return; |
2408 | } | 2410 | } |
2409 | 2411 | ||
@@ -2422,9 +2424,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2422 | * The function finds extents of pages and scan them for all blocks. | 2424 | * The function finds extents of pages and scan them for all blocks. |
2423 | */ | 2425 | */ |
2424 | static int __mpage_da_writepage(struct page *page, | 2426 | static int __mpage_da_writepage(struct page *page, |
2425 | struct writeback_control *wbc, void *data) | 2427 | struct writeback_control *wbc, |
2428 | struct mpage_da_data *mpd) | ||
2426 | { | 2429 | { |
2427 | struct mpage_da_data *mpd = data; | ||
2428 | struct inode *inode = mpd->inode; | 2430 | struct inode *inode = mpd->inode; |
2429 | struct buffer_head *bh, *head; | 2431 | struct buffer_head *bh, *head; |
2430 | sector_t logical; | 2432 | sector_t logical; |
@@ -2435,15 +2437,13 @@ static int __mpage_da_writepage(struct page *page, | |||
2435 | if (mpd->next_page != page->index) { | 2437 | if (mpd->next_page != page->index) { |
2436 | /* | 2438 | /* |
2437 | * Nope, we can't. So, we map non-allocated blocks | 2439 | * Nope, we can't. So, we map non-allocated blocks |
2438 | * and start IO on them using writepage() | 2440 | * and start IO on them |
2439 | */ | 2441 | */ |
2440 | if (mpd->next_page != mpd->first_page) { | 2442 | if (mpd->next_page != mpd->first_page) { |
2441 | if (mpage_da_map_blocks(mpd) == 0) | 2443 | mpage_da_map_and_submit(mpd); |
2442 | mpage_da_submit_io(mpd); | ||
2443 | /* | 2444 | /* |
2444 | * skip rest of the page in the page_vec | 2445 | * skip rest of the page in the page_vec |
2445 | */ | 2446 | */ |
2446 | mpd->io_done = 1; | ||
2447 | redirty_page_for_writepage(wbc, page); | 2447 | redirty_page_for_writepage(wbc, page); |
2448 | unlock_page(page); | 2448 | unlock_page(page); |
2449 | return MPAGE_DA_EXTENT_TAIL; | 2449 | return MPAGE_DA_EXTENT_TAIL; |
@@ -2622,6 +2622,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2622 | int ret = 0; | 2622 | int ret = 0; |
2623 | int err; | 2623 | int err; |
2624 | 2624 | ||
2625 | ClearPageChecked(page); | ||
2625 | page_bufs = page_buffers(page); | 2626 | page_bufs = page_buffers(page); |
2626 | BUG_ON(!page_bufs); | 2627 | BUG_ON(!page_bufs); |
2627 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
@@ -2699,7 +2700,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2699 | static int ext4_writepage(struct page *page, | 2700 | static int ext4_writepage(struct page *page, |
2700 | struct writeback_control *wbc) | 2701 | struct writeback_control *wbc) |
2701 | { | 2702 | { |
2702 | int ret = 0; | 2703 | int ret = 0, commit_write = 0; |
2703 | loff_t size; | 2704 | loff_t size; |
2704 | unsigned int len; | 2705 | unsigned int len; |
2705 | struct buffer_head *page_bufs = NULL; | 2706 | struct buffer_head *page_bufs = NULL; |
@@ -2712,71 +2713,46 @@ static int ext4_writepage(struct page *page, | |||
2712 | else | 2713 | else |
2713 | len = PAGE_CACHE_SIZE; | 2714 | len = PAGE_CACHE_SIZE; |
2714 | 2715 | ||
2715 | if (page_has_buffers(page)) { | 2716 | /* |
2716 | page_bufs = page_buffers(page); | 2717 | * If the page does not have buffers (for whatever reason), |
2717 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2718 | * try to create them using __block_write_begin. If this |
2718 | ext4_bh_delay_or_unwritten)) { | 2719 | * fails, redirty the page and move on. |
2719 | /* | 2720 | */ |
2720 | * We don't want to do block allocation | 2721 | if (!page_buffers(page)) { |
2721 | * So redirty the page and return | 2722 | if (__block_write_begin(page, 0, len, |
2722 | * We may reach here when we do a journal commit | 2723 | noalloc_get_block_write)) { |
2723 | * via journal_submit_inode_data_buffers. | 2724 | redirty_page: |
2724 | * If we don't have mapping block we just ignore | ||
2725 | * them. We can also reach here via shrink_page_list | ||
2726 | */ | ||
2727 | redirty_page_for_writepage(wbc, page); | 2725 | redirty_page_for_writepage(wbc, page); |
2728 | unlock_page(page); | 2726 | unlock_page(page); |
2729 | return 0; | 2727 | return 0; |
2730 | } | 2728 | } |
2731 | } else { | 2729 | commit_write = 1; |
2730 | } | ||
2731 | page_bufs = page_buffers(page); | ||
2732 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2733 | ext4_bh_delay_or_unwritten)) { | ||
2732 | /* | 2734 | /* |
2733 | * The test for page_has_buffers() is subtle: | 2735 | * We don't want to do block allocation So redirty the |
2734 | * We know the page is dirty but it lost buffers. That means | 2736 | * page and return We may reach here when we do a |
2735 | * that at some moment in time after write_begin()/write_end() | 2737 | * journal commit via |
2736 | * has been called all buffers have been clean and thus they | 2738 | * journal_submit_inode_data_buffers. If we don't |
2737 | * must have been written at least once. So they are all | 2739 | * have mapping block we just ignore them. We can also |
2738 | * mapped and we can happily proceed with mapping them | 2740 | * reach here via shrink_page_list |
2739 | * and writing the page. | ||
2740 | * | ||
2741 | * Try to initialize the buffer_heads and check whether | ||
2742 | * all are mapped and non delay. We don't want to | ||
2743 | * do block allocation here. | ||
2744 | */ | 2741 | */ |
2745 | ret = __block_write_begin(page, 0, len, | 2742 | goto redirty_page; |
2746 | noalloc_get_block_write); | 2743 | } |
2747 | if (!ret) { | 2744 | if (commit_write) |
2748 | page_bufs = page_buffers(page); | ||
2749 | /* check whether all are mapped and non delay */ | ||
2750 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2751 | ext4_bh_delay_or_unwritten)) { | ||
2752 | redirty_page_for_writepage(wbc, page); | ||
2753 | unlock_page(page); | ||
2754 | return 0; | ||
2755 | } | ||
2756 | } else { | ||
2757 | /* | ||
2758 | * We can't do block allocation here | ||
2759 | * so just redity the page and unlock | ||
2760 | * and return | ||
2761 | */ | ||
2762 | redirty_page_for_writepage(wbc, page); | ||
2763 | unlock_page(page); | ||
2764 | return 0; | ||
2765 | } | ||
2766 | /* now mark the buffer_heads as dirty and uptodate */ | 2745 | /* now mark the buffer_heads as dirty and uptodate */ |
2767 | block_commit_write(page, 0, len); | 2746 | block_commit_write(page, 0, len); |
2768 | } | ||
2769 | 2747 | ||
2770 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2748 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2771 | /* | 2749 | /* |
2772 | * It's mmapped pagecache. Add buffers and journal it. There | 2750 | * It's mmapped pagecache. Add buffers and journal it. There |
2773 | * doesn't seem much point in redirtying the page here. | 2751 | * doesn't seem much point in redirtying the page here. |
2774 | */ | 2752 | */ |
2775 | ClearPageChecked(page); | ||
2776 | return __ext4_journalled_writepage(page, len); | 2753 | return __ext4_journalled_writepage(page, len); |
2777 | } | ||
2778 | 2754 | ||
2779 | if (page_bufs && buffer_uninit(page_bufs)) { | 2755 | if (buffer_uninit(page_bufs)) { |
2780 | ext4_set_bh_endio(page_bufs, inode); | 2756 | ext4_set_bh_endio(page_bufs, inode); |
2781 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2757 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2782 | wbc, ext4_end_io_buffer_write); | 2758 | wbc, ext4_end_io_buffer_write); |
@@ -2823,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2823 | */ | 2799 | */ |
2824 | static int write_cache_pages_da(struct address_space *mapping, | 2800 | static int write_cache_pages_da(struct address_space *mapping, |
2825 | struct writeback_control *wbc, | 2801 | struct writeback_control *wbc, |
2826 | struct mpage_da_data *mpd) | 2802 | struct mpage_da_data *mpd, |
2803 | pgoff_t *done_index) | ||
2827 | { | 2804 | { |
2828 | int ret = 0; | 2805 | int ret = 0; |
2829 | int done = 0; | 2806 | int done = 0; |
2830 | struct pagevec pvec; | 2807 | struct pagevec pvec; |
2831 | int nr_pages; | 2808 | unsigned nr_pages; |
2832 | pgoff_t index; | 2809 | pgoff_t index; |
2833 | pgoff_t end; /* Inclusive */ | 2810 | pgoff_t end; /* Inclusive */ |
2834 | long nr_to_write = wbc->nr_to_write; | 2811 | long nr_to_write = wbc->nr_to_write; |
2812 | int tag; | ||
2835 | 2813 | ||
2836 | pagevec_init(&pvec, 0); | 2814 | pagevec_init(&pvec, 0); |
2837 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2815 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2838 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2816 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2839 | 2817 | ||
2818 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2819 | tag = PAGECACHE_TAG_TOWRITE; | ||
2820 | else | ||
2821 | tag = PAGECACHE_TAG_DIRTY; | ||
2822 | |||
2823 | *done_index = index; | ||
2840 | while (!done && (index <= end)) { | 2824 | while (!done && (index <= end)) { |
2841 | int i; | 2825 | int i; |
2842 | 2826 | ||
2843 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2827 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2844 | PAGECACHE_TAG_DIRTY, | ||
2845 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2828 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2846 | if (nr_pages == 0) | 2829 | if (nr_pages == 0) |
2847 | break; | 2830 | break; |
@@ -2861,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2861 | break; | 2844 | break; |
2862 | } | 2845 | } |
2863 | 2846 | ||
2847 | *done_index = page->index + 1; | ||
2848 | |||
2864 | lock_page(page); | 2849 | lock_page(page); |
2865 | 2850 | ||
2866 | /* | 2851 | /* |
@@ -2946,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2946 | long desired_nr_to_write, nr_to_writebump = 0; | 2931 | long desired_nr_to_write, nr_to_writebump = 0; |
2947 | loff_t range_start = wbc->range_start; | 2932 | loff_t range_start = wbc->range_start; |
2948 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2933 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2934 | pgoff_t done_index = 0; | ||
2935 | pgoff_t end; | ||
2949 | 2936 | ||
2950 | trace_ext4_da_writepages(inode, wbc); | 2937 | trace_ext4_da_writepages(inode, wbc); |
2951 | 2938 | ||
@@ -2981,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2981 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2968 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2982 | wbc->range_end = LLONG_MAX; | 2969 | wbc->range_end = LLONG_MAX; |
2983 | wbc->range_cyclic = 0; | 2970 | wbc->range_cyclic = 0; |
2984 | } else | 2971 | end = -1; |
2972 | } else { | ||
2985 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2973 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2974 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2975 | } | ||
2986 | 2976 | ||
2987 | /* | 2977 | /* |
2988 | * This works around two forms of stupidity. The first is in | 2978 | * This works around two forms of stupidity. The first is in |
@@ -3001,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3001 | * sbi->max_writeback_mb_bump whichever is smaller. | 2991 | * sbi->max_writeback_mb_bump whichever is smaller. |
3002 | */ | 2992 | */ |
3003 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2993 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
3004 | if (!range_cyclic && range_whole) | 2994 | if (!range_cyclic && range_whole) { |
3005 | desired_nr_to_write = wbc->nr_to_write * 8; | 2995 | if (wbc->nr_to_write == LONG_MAX) |
3006 | else | 2996 | desired_nr_to_write = wbc->nr_to_write; |
2997 | else | ||
2998 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2999 | } else | ||
3007 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 3000 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
3008 | max_pages); | 3001 | max_pages); |
3009 | if (desired_nr_to_write > max_pages) | 3002 | if (desired_nr_to_write > max_pages) |
@@ -3020,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3020 | pages_skipped = wbc->pages_skipped; | 3013 | pages_skipped = wbc->pages_skipped; |
3021 | 3014 | ||
3022 | retry: | 3015 | retry: |
3016 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
3017 | tag_pages_for_writeback(mapping, index, end); | ||
3018 | |||
3023 | while (!ret && wbc->nr_to_write > 0) { | 3019 | while (!ret && wbc->nr_to_write > 0) { |
3024 | 3020 | ||
3025 | /* | 3021 | /* |
@@ -3058,16 +3054,14 @@ retry: | |||
3058 | mpd.io_done = 0; | 3054 | mpd.io_done = 0; |
3059 | mpd.pages_written = 0; | 3055 | mpd.pages_written = 0; |
3060 | mpd.retval = 0; | 3056 | mpd.retval = 0; |
3061 | ret = write_cache_pages_da(mapping, wbc, &mpd); | 3057 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3062 | /* | 3058 | /* |
3063 | * If we have a contiguous extent of pages and we | 3059 | * If we have a contiguous extent of pages and we |
3064 | * haven't done the I/O yet, map the blocks and submit | 3060 | * haven't done the I/O yet, map the blocks and submit |
3065 | * them for I/O. | 3061 | * them for I/O. |
3066 | */ | 3062 | */ |
3067 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3063 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3068 | if (mpage_da_map_blocks(&mpd) == 0) | 3064 | mpage_da_map_and_submit(&mpd); |
3069 | mpage_da_submit_io(&mpd); | ||
3070 | mpd.io_done = 1; | ||
3071 | ret = MPAGE_DA_EXTENT_TAIL; | 3065 | ret = MPAGE_DA_EXTENT_TAIL; |
3072 | } | 3066 | } |
3073 | trace_ext4_da_write_pages(inode, &mpd); | 3067 | trace_ext4_da_write_pages(inode, &mpd); |
@@ -3114,14 +3108,13 @@ retry: | |||
3114 | __func__, wbc->nr_to_write, ret); | 3108 | __func__, wbc->nr_to_write, ret); |
3115 | 3109 | ||
3116 | /* Update index */ | 3110 | /* Update index */ |
3117 | index += pages_written; | ||
3118 | wbc->range_cyclic = range_cyclic; | 3111 | wbc->range_cyclic = range_cyclic; |
3119 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3112 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3120 | /* | 3113 | /* |
3121 | * set the writeback_index so that range_cyclic | 3114 | * set the writeback_index so that range_cyclic |
3122 | * mode will write it back later | 3115 | * mode will write it back later |
3123 | */ | 3116 | */ |
3124 | mapping->writeback_index = index; | 3117 | mapping->writeback_index = done_index; |
3125 | 3118 | ||
3126 | out_writepages: | 3119 | out_writepages: |
3127 | wbc->nr_to_write -= nr_to_writebump; | 3120 | wbc->nr_to_write -= nr_to_writebump; |
@@ -3456,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3456 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3449 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3457 | } | 3450 | } |
3458 | 3451 | ||
3459 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3460 | { | ||
3461 | BUG_ON(!io); | ||
3462 | if (io->page) | ||
3463 | put_page(io->page); | ||
3464 | iput(io->inode); | ||
3465 | kfree(io); | ||
3466 | } | ||
3467 | |||
3468 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3452 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3469 | { | 3453 | { |
3470 | struct buffer_head *head, *bh; | 3454 | struct buffer_head *head, *bh; |
@@ -3641,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3641 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3625 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3642 | } | 3626 | } |
3643 | 3627 | ||
3644 | static void dump_completed_IO(struct inode * inode) | ||
3645 | { | ||
3646 | #ifdef EXT4_DEBUG | ||
3647 | struct list_head *cur, *before, *after; | ||
3648 | ext4_io_end_t *io, *io0, *io1; | ||
3649 | unsigned long flags; | ||
3650 | |||
3651 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
3652 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
3653 | return; | ||
3654 | } | ||
3655 | |||
3656 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
3657 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3658 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3659 | cur = &io->list; | ||
3660 | before = cur->prev; | ||
3661 | io0 = container_of(before, ext4_io_end_t, list); | ||
3662 | after = cur->next; | ||
3663 | io1 = container_of(after, ext4_io_end_t, list); | ||
3664 | |||
3665 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3666 | io, inode->i_ino, io0, io1); | ||
3667 | } | ||
3668 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3669 | #endif | ||
3670 | } | ||
3671 | |||
3672 | /* | ||
3673 | * check a range of space and convert unwritten extents to written. | ||
3674 | */ | ||
3675 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3676 | { | ||
3677 | struct inode *inode = io->inode; | ||
3678 | loff_t offset = io->offset; | ||
3679 | ssize_t size = io->size; | ||
3680 | int ret = 0; | ||
3681 | |||
3682 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3683 | "list->prev 0x%p\n", | ||
3684 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3685 | |||
3686 | if (list_empty(&io->list)) | ||
3687 | return ret; | ||
3688 | |||
3689 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3690 | return ret; | ||
3691 | |||
3692 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3693 | if (ret < 0) { | ||
3694 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3695 | "extents to written extents, error is %d" | ||
3696 | " io is still on inode %lu aio dio list\n", | ||
3697 | __func__, ret, inode->i_ino); | ||
3698 | return ret; | ||
3699 | } | ||
3700 | |||
3701 | if (io->iocb) | ||
3702 | aio_complete(io->iocb, io->result, 0); | ||
3703 | /* clear the DIO AIO unwritten flag */ | ||
3704 | io->flag = 0; | ||
3705 | return ret; | ||
3706 | } | ||
3707 | |||
3708 | /* | ||
3709 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3710 | */ | ||
3711 | static void ext4_end_io_work(struct work_struct *work) | ||
3712 | { | ||
3713 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3714 | struct inode *inode = io->inode; | ||
3715 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3716 | unsigned long flags; | ||
3717 | int ret; | ||
3718 | |||
3719 | mutex_lock(&inode->i_mutex); | ||
3720 | ret = ext4_end_io_nolock(io); | ||
3721 | if (ret < 0) { | ||
3722 | mutex_unlock(&inode->i_mutex); | ||
3723 | return; | ||
3724 | } | ||
3725 | |||
3726 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3727 | if (!list_empty(&io->list)) | ||
3728 | list_del_init(&io->list); | ||
3729 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3730 | mutex_unlock(&inode->i_mutex); | ||
3731 | ext4_free_io_end(io); | ||
3732 | } | ||
3733 | |||
3734 | /* | ||
3735 | * This function is called from ext4_sync_file(). | ||
3736 | * | ||
3737 | * When IO is completed, the work to convert unwritten extents to | ||
3738 | * written is queued on workqueue but may not get immediately | ||
3739 | * scheduled. When fsync is called, we need to ensure the | ||
3740 | * conversion is complete before fsync returns. | ||
3741 | * The inode keeps track of a list of pending/completed IO that | ||
3742 | * might needs to do the conversion. This function walks through | ||
3743 | * the list and convert the related unwritten extents for completed IO | ||
3744 | * to written. | ||
3745 | * The function return the number of pending IOs on success. | ||
3746 | */ | ||
3747 | int flush_completed_IO(struct inode *inode) | ||
3748 | { | ||
3749 | ext4_io_end_t *io; | ||
3750 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3751 | unsigned long flags; | ||
3752 | int ret = 0; | ||
3753 | int ret2 = 0; | ||
3754 | |||
3755 | if (list_empty(&ei->i_completed_io_list)) | ||
3756 | return ret; | ||
3757 | |||
3758 | dump_completed_IO(inode); | ||
3759 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3760 | while (!list_empty(&ei->i_completed_io_list)){ | ||
3761 | io = list_entry(ei->i_completed_io_list.next, | ||
3762 | ext4_io_end_t, list); | ||
3763 | /* | ||
3764 | * Calling ext4_end_io_nolock() to convert completed | ||
3765 | * IO to written. | ||
3766 | * | ||
3767 | * When ext4_sync_file() is called, run_queue() may already | ||
3768 | * about to flush the work corresponding to this io structure. | ||
3769 | * It will be upset if it founds the io structure related | ||
3770 | * to the work-to-be schedule is freed. | ||
3771 | * | ||
3772 | * Thus we need to keep the io structure still valid here after | ||
3773 | * convertion finished. The io structure has a flag to | ||
3774 | * avoid double converting from both fsync and background work | ||
3775 | * queue work. | ||
3776 | */ | ||
3777 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3778 | ret = ext4_end_io_nolock(io); | ||
3779 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3780 | if (ret < 0) | ||
3781 | ret2 = ret; | ||
3782 | else | ||
3783 | list_del_init(&io->list); | ||
3784 | } | ||
3785 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3786 | return (ret2 < 0) ? ret2 : 0; | ||
3787 | } | ||
3788 | |||
3789 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3790 | { | ||
3791 | ext4_io_end_t *io = NULL; | ||
3792 | |||
3793 | io = kmalloc(sizeof(*io), flags); | ||
3794 | |||
3795 | if (io) { | ||
3796 | igrab(inode); | ||
3797 | io->inode = inode; | ||
3798 | io->flag = 0; | ||
3799 | io->offset = 0; | ||
3800 | io->size = 0; | ||
3801 | io->page = NULL; | ||
3802 | io->iocb = NULL; | ||
3803 | io->result = 0; | ||
3804 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3805 | INIT_LIST_HEAD(&io->list); | ||
3806 | } | ||
3807 | |||
3808 | return io; | ||
3809 | } | ||
3810 | |||
3811 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3628 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3812 | ssize_t size, void *private, int ret, | 3629 | ssize_t size, void *private, int ret, |
3813 | bool is_async) | 3630 | bool is_async) |
@@ -3827,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3827 | size); | 3644 | size); |
3828 | 3645 | ||
3829 | /* if not aio dio with unwritten extents, just free io and return */ | 3646 | /* if not aio dio with unwritten extents, just free io and return */ |
3830 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3647 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3831 | ext4_free_io_end(io_end); | 3648 | ext4_free_io_end(io_end); |
3832 | iocb->private = NULL; | 3649 | iocb->private = NULL; |
3833 | out: | 3650 | out: |
@@ -3844,14 +3661,14 @@ out: | |||
3844 | } | 3661 | } |
3845 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3662 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3846 | 3663 | ||
3847 | /* queue the work to convert unwritten extents to written */ | ||
3848 | queue_work(wq, &io_end->work); | ||
3849 | |||
3850 | /* Add the io_end to per-inode completed aio dio list*/ | 3664 | /* Add the io_end to per-inode completed aio dio list*/ |
3851 | ei = EXT4_I(io_end->inode); | 3665 | ei = EXT4_I(io_end->inode); |
3852 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3666 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3853 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3667 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3854 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3668 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3669 | |||
3670 | /* queue the work to convert unwritten extents to written */ | ||
3671 | queue_work(wq, &io_end->work); | ||
3855 | iocb->private = NULL; | 3672 | iocb->private = NULL; |
3856 | } | 3673 | } |
3857 | 3674 | ||
@@ -3872,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3872 | goto out; | 3689 | goto out; |
3873 | } | 3690 | } |
3874 | 3691 | ||
3875 | io_end->flag = EXT4_IO_UNWRITTEN; | 3692 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3876 | inode = io_end->inode; | 3693 | inode = io_end->inode; |
3877 | 3694 | ||
3878 | /* Add the io_end to per-inode completed io list*/ | 3695 | /* Add the io_end to per-inode completed io list*/ |
@@ -5463,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5463 | { | 5280 | { |
5464 | struct inode *inode = dentry->d_inode; | 5281 | struct inode *inode = dentry->d_inode; |
5465 | int error, rc = 0; | 5282 | int error, rc = 0; |
5283 | int orphan = 0; | ||
5466 | const unsigned int ia_valid = attr->ia_valid; | 5284 | const unsigned int ia_valid = attr->ia_valid; |
5467 | 5285 | ||
5468 | error = inode_change_ok(inode, attr); | 5286 | error = inode_change_ok(inode, attr); |
@@ -5518,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5518 | error = PTR_ERR(handle); | 5336 | error = PTR_ERR(handle); |
5519 | goto err_out; | 5337 | goto err_out; |
5520 | } | 5338 | } |
5521 | 5339 | if (ext4_handle_valid(handle)) { | |
5522 | error = ext4_orphan_add(handle, inode); | 5340 | error = ext4_orphan_add(handle, inode); |
5341 | orphan = 1; | ||
5342 | } | ||
5523 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5343 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5524 | rc = ext4_mark_inode_dirty(handle, inode); | 5344 | rc = ext4_mark_inode_dirty(handle, inode); |
5525 | if (!error) | 5345 | if (!error) |
@@ -5537,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5537 | goto err_out; | 5357 | goto err_out; |
5538 | } | 5358 | } |
5539 | ext4_orphan_del(handle, inode); | 5359 | ext4_orphan_del(handle, inode); |
5360 | orphan = 0; | ||
5540 | ext4_journal_stop(handle); | 5361 | ext4_journal_stop(handle); |
5541 | goto err_out; | 5362 | goto err_out; |
5542 | } | 5363 | } |
@@ -5559,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5559 | * If the call to ext4_truncate failed to get a transaction handle at | 5380 | * If the call to ext4_truncate failed to get a transaction handle at |
5560 | * all, we need to clean up the in-core orphan list manually. | 5381 | * all, we need to clean up the in-core orphan list manually. |
5561 | */ | 5382 | */ |
5562 | if (inode->i_nlink) | 5383 | if (orphan && inode->i_nlink) |
5563 | ext4_orphan_del(NULL, inode); | 5384 | ext4_orphan_del(NULL, inode); |
5564 | 5385 | ||
5565 | if (!rc && (ia_valid & ATTR_MODE)) | 5386 | if (!rc && (ia_valid & ATTR_MODE)) |
@@ -5642,7 +5463,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5642 | * | 5463 | * |
5643 | * Also account for superblock, inode, quota and xattr blocks | 5464 | * Also account for superblock, inode, quota and xattr blocks |
5644 | */ | 5465 | */ |
5645 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5466 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5646 | { | 5467 | { |
5647 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5468 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5648 | int gdpblocks; | 5469 | int gdpblocks; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 42f77b1dc72d..c58eba34724a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -338,6 +338,14 @@ | |||
338 | static struct kmem_cache *ext4_pspace_cachep; | 338 | static struct kmem_cache *ext4_pspace_cachep; |
339 | static struct kmem_cache *ext4_ac_cachep; | 339 | static struct kmem_cache *ext4_ac_cachep; |
340 | static struct kmem_cache *ext4_free_ext_cachep; | 340 | static struct kmem_cache *ext4_free_ext_cachep; |
341 | |||
342 | /* We create slab caches for groupinfo data structures based on the | ||
343 | * superblock block size. There will be one per mounted filesystem for | ||
344 | * each unique s_blocksize_bits */ | ||
345 | #define NR_GRPINFO_CACHES \ | ||
346 | (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1) | ||
347 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; | ||
348 | |||
341 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 349 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
342 | ext4_group_t group); | 350 | ext4_group_t group); |
343 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 351 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
@@ -939,6 +947,85 @@ out: | |||
939 | } | 947 | } |
940 | 948 | ||
941 | /* | 949 | /* |
950 | * lock the group_info alloc_sem of all the groups | ||
951 | * belonging to the same buddy cache page. This | ||
952 | * make sure other parallel operation on the buddy | ||
953 | * cache doesn't happen whild holding the buddy cache | ||
954 | * lock | ||
955 | */ | ||
956 | static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | ||
957 | ext4_group_t group) | ||
958 | { | ||
959 | int i; | ||
960 | int block, pnum; | ||
961 | int blocks_per_page; | ||
962 | int groups_per_page; | ||
963 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
964 | ext4_group_t first_group; | ||
965 | struct ext4_group_info *grp; | ||
966 | |||
967 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
968 | /* | ||
969 | * the buddy cache inode stores the block bitmap | ||
970 | * and buddy information in consecutive blocks. | ||
971 | * So for each group we need two blocks. | ||
972 | */ | ||
973 | block = group * 2; | ||
974 | pnum = block / blocks_per_page; | ||
975 | first_group = pnum * blocks_per_page / 2; | ||
976 | |||
977 | groups_per_page = blocks_per_page >> 1; | ||
978 | if (groups_per_page == 0) | ||
979 | groups_per_page = 1; | ||
980 | /* read all groups the page covers into the cache */ | ||
981 | for (i = 0; i < groups_per_page; i++) { | ||
982 | |||
983 | if ((first_group + i) >= ngroups) | ||
984 | break; | ||
985 | grp = ext4_get_group_info(sb, first_group + i); | ||
986 | /* take all groups write allocation | ||
987 | * semaphore. This make sure there is | ||
988 | * no block allocation going on in any | ||
989 | * of that groups | ||
990 | */ | ||
991 | down_write_nested(&grp->alloc_sem, i); | ||
992 | } | ||
993 | return i; | ||
994 | } | ||
995 | |||
996 | static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
997 | ext4_group_t group, int locked_group) | ||
998 | { | ||
999 | int i; | ||
1000 | int block, pnum; | ||
1001 | int blocks_per_page; | ||
1002 | ext4_group_t first_group; | ||
1003 | struct ext4_group_info *grp; | ||
1004 | |||
1005 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1006 | /* | ||
1007 | * the buddy cache inode stores the block bitmap | ||
1008 | * and buddy information in consecutive blocks. | ||
1009 | * So for each group we need two blocks. | ||
1010 | */ | ||
1011 | block = group * 2; | ||
1012 | pnum = block / blocks_per_page; | ||
1013 | first_group = pnum * blocks_per_page / 2; | ||
1014 | /* release locks on all the groups */ | ||
1015 | for (i = 0; i < locked_group; i++) { | ||
1016 | |||
1017 | grp = ext4_get_group_info(sb, first_group + i); | ||
1018 | /* take all groups write allocation | ||
1019 | * semaphore. This make sure there is | ||
1020 | * no block allocation going on in any | ||
1021 | * of that groups | ||
1022 | */ | ||
1023 | up_write(&grp->alloc_sem); | ||
1024 | } | ||
1025 | |||
1026 | } | ||
1027 | |||
1028 | /* | ||
942 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | 1029 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the |
943 | * block group lock of all groups for this page; do not hold the BG lock when | 1030 | * block group lock of all groups for this page; do not hold the BG lock when |
944 | * calling this routine! | 1031 | * calling this routine! |
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1915 | return 0; | 2002 | return 0; |
1916 | } | 2003 | } |
1917 | 2004 | ||
1918 | /* | ||
1919 | * lock the group_info alloc_sem of all the groups | ||
1920 | * belonging to the same buddy cache page. This | ||
1921 | * make sure other parallel operation on the buddy | ||
1922 | * cache doesn't happen whild holding the buddy cache | ||
1923 | * lock | ||
1924 | */ | ||
1925 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1926 | { | ||
1927 | int i; | ||
1928 | int block, pnum; | ||
1929 | int blocks_per_page; | ||
1930 | int groups_per_page; | ||
1931 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1932 | ext4_group_t first_group; | ||
1933 | struct ext4_group_info *grp; | ||
1934 | |||
1935 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1936 | /* | ||
1937 | * the buddy cache inode stores the block bitmap | ||
1938 | * and buddy information in consecutive blocks. | ||
1939 | * So for each group we need two blocks. | ||
1940 | */ | ||
1941 | block = group * 2; | ||
1942 | pnum = block / blocks_per_page; | ||
1943 | first_group = pnum * blocks_per_page / 2; | ||
1944 | |||
1945 | groups_per_page = blocks_per_page >> 1; | ||
1946 | if (groups_per_page == 0) | ||
1947 | groups_per_page = 1; | ||
1948 | /* read all groups the page covers into the cache */ | ||
1949 | for (i = 0; i < groups_per_page; i++) { | ||
1950 | |||
1951 | if ((first_group + i) >= ngroups) | ||
1952 | break; | ||
1953 | grp = ext4_get_group_info(sb, first_group + i); | ||
1954 | /* take all groups write allocation | ||
1955 | * semaphore. This make sure there is | ||
1956 | * no block allocation going on in any | ||
1957 | * of that groups | ||
1958 | */ | ||
1959 | down_write_nested(&grp->alloc_sem, i); | ||
1960 | } | ||
1961 | return i; | ||
1962 | } | ||
1963 | |||
1964 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1965 | ext4_group_t group, int locked_group) | ||
1966 | { | ||
1967 | int i; | ||
1968 | int block, pnum; | ||
1969 | int blocks_per_page; | ||
1970 | ext4_group_t first_group; | ||
1971 | struct ext4_group_info *grp; | ||
1972 | |||
1973 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1974 | /* | ||
1975 | * the buddy cache inode stores the block bitmap | ||
1976 | * and buddy information in consecutive blocks. | ||
1977 | * So for each group we need two blocks. | ||
1978 | */ | ||
1979 | block = group * 2; | ||
1980 | pnum = block / blocks_per_page; | ||
1981 | first_group = pnum * blocks_per_page / 2; | ||
1982 | /* release locks on all the groups */ | ||
1983 | for (i = 0; i < locked_group; i++) { | ||
1984 | |||
1985 | grp = ext4_get_group_info(sb, first_group + i); | ||
1986 | /* take all groups write allocation | ||
1987 | * semaphore. This make sure there is | ||
1988 | * no block allocation going on in any | ||
1989 | * of that groups | ||
1990 | */ | ||
1991 | up_write(&grp->alloc_sem); | ||
1992 | } | ||
1993 | |||
1994 | } | ||
1995 | |||
1996 | static noinline_for_stack int | 2005 | static noinline_for_stack int |
1997 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 2006 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1998 | { | 2007 | { |
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = { | |||
2233 | .release = seq_release, | 2242 | .release = seq_release, |
2234 | }; | 2243 | }; |
2235 | 2244 | ||
2245 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | ||
2246 | { | ||
2247 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2248 | struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index]; | ||
2249 | |||
2250 | BUG_ON(!cachep); | ||
2251 | return cachep; | ||
2252 | } | ||
2236 | 2253 | ||
2237 | /* Create and initialize ext4_group_info data for the given group. */ | 2254 | /* Create and initialize ext4_group_info data for the given group. */ |
2238 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2255 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2239 | struct ext4_group_desc *desc) | 2256 | struct ext4_group_desc *desc) |
2240 | { | 2257 | { |
2241 | int i, len; | 2258 | int i; |
2242 | int metalen = 0; | 2259 | int metalen = 0; |
2243 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2260 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2244 | struct ext4_group_info **meta_group_info; | 2261 | struct ext4_group_info **meta_group_info; |
2262 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2245 | 2263 | ||
2246 | /* | 2264 | /* |
2247 | * First check if this group is the first of a reserved block. | 2265 | * First check if this group is the first of a reserved block. |
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2261 | meta_group_info; | 2279 | meta_group_info; |
2262 | } | 2280 | } |
2263 | 2281 | ||
2264 | /* | ||
2265 | * calculate needed size. if change bb_counters size, | ||
2266 | * don't forget about ext4_mb_generate_buddy() | ||
2267 | */ | ||
2268 | len = offsetof(typeof(**meta_group_info), | ||
2269 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2270 | |||
2271 | meta_group_info = | 2282 | meta_group_info = |
2272 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2283 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2273 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2284 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2274 | 2285 | ||
2275 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | 2286 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2276 | if (meta_group_info[i] == NULL) { | 2287 | if (meta_group_info[i] == NULL) { |
2277 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | 2288 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); |
2278 | goto exit_group_info; | 2289 | goto exit_group_info; |
2279 | } | 2290 | } |
2291 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2280 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2292 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2281 | &(meta_group_info[i]->bb_state)); | 2293 | &(meta_group_info[i]->bb_state)); |
2282 | 2294 | ||
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2331 | int num_meta_group_infos_max; | 2343 | int num_meta_group_infos_max; |
2332 | int array_size; | 2344 | int array_size; |
2333 | struct ext4_group_desc *desc; | 2345 | struct ext4_group_desc *desc; |
2346 | struct kmem_cache *cachep; | ||
2334 | 2347 | ||
2335 | /* This is the number of blocks used by GDT */ | 2348 | /* This is the number of blocks used by GDT */ |
2336 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2349 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
@@ -2389,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2389 | return 0; | 2402 | return 0; |
2390 | 2403 | ||
2391 | err_freebuddy: | 2404 | err_freebuddy: |
2405 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2392 | while (i-- > 0) | 2406 | while (i-- > 0) |
2393 | kfree(ext4_get_group_info(sb, i)); | 2407 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2394 | i = num_meta_group_infos; | 2408 | i = num_meta_group_infos; |
2395 | while (i-- > 0) | 2409 | while (i-- > 0) |
2396 | kfree(sbi->s_group_info[i]); | 2410 | kfree(sbi->s_group_info[i]); |
@@ -2407,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2407 | unsigned offset; | 2421 | unsigned offset; |
2408 | unsigned max; | 2422 | unsigned max; |
2409 | int ret; | 2423 | int ret; |
2424 | int cache_index; | ||
2425 | struct kmem_cache *cachep; | ||
2426 | char *namep = NULL; | ||
2410 | 2427 | ||
2411 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); | 2428 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2412 | 2429 | ||
2413 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2430 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2414 | if (sbi->s_mb_offsets == NULL) { | 2431 | if (sbi->s_mb_offsets == NULL) { |
2415 | return -ENOMEM; | 2432 | ret = -ENOMEM; |
2433 | goto out; | ||
2416 | } | 2434 | } |
2417 | 2435 | ||
2418 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); | 2436 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2419 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2437 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2420 | if (sbi->s_mb_maxs == NULL) { | 2438 | if (sbi->s_mb_maxs == NULL) { |
2421 | kfree(sbi->s_mb_offsets); | 2439 | ret = -ENOMEM; |
2422 | return -ENOMEM; | 2440 | goto out; |
2441 | } | ||
2442 | |||
2443 | cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2444 | cachep = ext4_groupinfo_caches[cache_index]; | ||
2445 | if (!cachep) { | ||
2446 | char name[32]; | ||
2447 | int len = offsetof(struct ext4_group_info, | ||
2448 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2449 | |||
2450 | sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits); | ||
2451 | namep = kstrdup(name, GFP_KERNEL); | ||
2452 | if (!namep) { | ||
2453 | ret = -ENOMEM; | ||
2454 | goto out; | ||
2455 | } | ||
2456 | |||
2457 | /* Need to free the kmem_cache_name() when we | ||
2458 | * destroy the slab */ | ||
2459 | cachep = kmem_cache_create(namep, len, 0, | ||
2460 | SLAB_RECLAIM_ACCOUNT, NULL); | ||
2461 | if (!cachep) { | ||
2462 | ret = -ENOMEM; | ||
2463 | goto out; | ||
2464 | } | ||
2465 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2423 | } | 2466 | } |
2424 | 2467 | ||
2425 | /* order 0 is regular bitmap */ | 2468 | /* order 0 is regular bitmap */ |
@@ -2440,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2440 | /* init file for buddy data */ | 2483 | /* init file for buddy data */ |
2441 | ret = ext4_mb_init_backend(sb); | 2484 | ret = ext4_mb_init_backend(sb); |
2442 | if (ret != 0) { | 2485 | if (ret != 0) { |
2443 | kfree(sbi->s_mb_offsets); | 2486 | goto out; |
2444 | kfree(sbi->s_mb_maxs); | ||
2445 | return ret; | ||
2446 | } | 2487 | } |
2447 | 2488 | ||
2448 | spin_lock_init(&sbi->s_md_lock); | 2489 | spin_lock_init(&sbi->s_md_lock); |
@@ -2457,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2457 | 2498 | ||
2458 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2499 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2459 | if (sbi->s_locality_groups == NULL) { | 2500 | if (sbi->s_locality_groups == NULL) { |
2460 | kfree(sbi->s_mb_offsets); | 2501 | ret = -ENOMEM; |
2461 | kfree(sbi->s_mb_maxs); | 2502 | goto out; |
2462 | return -ENOMEM; | ||
2463 | } | 2503 | } |
2464 | for_each_possible_cpu(i) { | 2504 | for_each_possible_cpu(i) { |
2465 | struct ext4_locality_group *lg; | 2505 | struct ext4_locality_group *lg; |
@@ -2476,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2476 | 2516 | ||
2477 | if (sbi->s_journal) | 2517 | if (sbi->s_journal) |
2478 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2518 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2479 | return 0; | 2519 | out: |
2520 | if (ret) { | ||
2521 | kfree(sbi->s_mb_offsets); | ||
2522 | kfree(sbi->s_mb_maxs); | ||
2523 | kfree(namep); | ||
2524 | } | ||
2525 | return ret; | ||
2480 | } | 2526 | } |
2481 | 2527 | ||
2482 | /* need to called with the ext4 group lock held */ | 2528 | /* need to called with the ext4 group lock held */ |
@@ -2504,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2504 | int num_meta_group_infos; | 2550 | int num_meta_group_infos; |
2505 | struct ext4_group_info *grinfo; | 2551 | struct ext4_group_info *grinfo; |
2506 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2552 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2553 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2507 | 2554 | ||
2508 | if (sbi->s_group_info) { | 2555 | if (sbi->s_group_info) { |
2509 | for (i = 0; i < ngroups; i++) { | 2556 | for (i = 0; i < ngroups; i++) { |
@@ -2514,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2514 | ext4_lock_group(sb, i); | 2561 | ext4_lock_group(sb, i); |
2515 | ext4_mb_cleanup_pa(grinfo); | 2562 | ext4_mb_cleanup_pa(grinfo); |
2516 | ext4_unlock_group(sb, i); | 2563 | ext4_unlock_group(sb, i); |
2517 | kfree(grinfo); | 2564 | kmem_cache_free(cachep, grinfo); |
2518 | } | 2565 | } |
2519 | num_meta_group_infos = (ngroups + | 2566 | num_meta_group_infos = (ngroups + |
2520 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2567 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
@@ -2558,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2558 | return 0; | 2605 | return 0; |
2559 | } | 2606 | } |
2560 | 2607 | ||
2561 | static inline void ext4_issue_discard(struct super_block *sb, | 2608 | static inline int ext4_issue_discard(struct super_block *sb, |
2562 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2609 | ext4_group_t block_group, ext4_grpblk_t block, int count) |
2563 | { | 2610 | { |
2564 | int ret; | 2611 | int ret; |
@@ -2568,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb, | |||
2568 | trace_ext4_discard_blocks(sb, | 2615 | trace_ext4_discard_blocks(sb, |
2569 | (unsigned long long) discard_block, count); | 2616 | (unsigned long long) discard_block, count); |
2570 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); | 2617 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
2571 | if (ret == EOPNOTSUPP) { | 2618 | if (ret == -EOPNOTSUPP) { |
2572 | ext4_warning(sb, "discard not supported, disabling"); | 2619 | ext4_warning(sb, "discard not supported, disabling"); |
2573 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | 2620 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); |
2574 | } | 2621 | } |
2622 | return ret; | ||
2575 | } | 2623 | } |
2576 | 2624 | ||
2577 | /* | 2625 | /* |
@@ -2659,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void) | |||
2659 | 2707 | ||
2660 | #endif | 2708 | #endif |
2661 | 2709 | ||
2662 | int __init init_ext4_mballoc(void) | 2710 | int __init ext4_init_mballoc(void) |
2663 | { | 2711 | { |
2664 | ext4_pspace_cachep = | 2712 | ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, |
2665 | kmem_cache_create("ext4_prealloc_space", | 2713 | SLAB_RECLAIM_ACCOUNT); |
2666 | sizeof(struct ext4_prealloc_space), | ||
2667 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2668 | if (ext4_pspace_cachep == NULL) | 2714 | if (ext4_pspace_cachep == NULL) |
2669 | return -ENOMEM; | 2715 | return -ENOMEM; |
2670 | 2716 | ||
2671 | ext4_ac_cachep = | 2717 | ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, |
2672 | kmem_cache_create("ext4_alloc_context", | 2718 | SLAB_RECLAIM_ACCOUNT); |
2673 | sizeof(struct ext4_allocation_context), | ||
2674 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2675 | if (ext4_ac_cachep == NULL) { | 2719 | if (ext4_ac_cachep == NULL) { |
2676 | kmem_cache_destroy(ext4_pspace_cachep); | 2720 | kmem_cache_destroy(ext4_pspace_cachep); |
2677 | return -ENOMEM; | 2721 | return -ENOMEM; |
2678 | } | 2722 | } |
2679 | 2723 | ||
2680 | ext4_free_ext_cachep = | 2724 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, |
2681 | kmem_cache_create("ext4_free_block_extents", | 2725 | SLAB_RECLAIM_ACCOUNT); |
2682 | sizeof(struct ext4_free_data), | ||
2683 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2684 | if (ext4_free_ext_cachep == NULL) { | 2726 | if (ext4_free_ext_cachep == NULL) { |
2685 | kmem_cache_destroy(ext4_pspace_cachep); | 2727 | kmem_cache_destroy(ext4_pspace_cachep); |
2686 | kmem_cache_destroy(ext4_ac_cachep); | 2728 | kmem_cache_destroy(ext4_ac_cachep); |
@@ -2690,8 +2732,9 @@ int __init init_ext4_mballoc(void) | |||
2690 | return 0; | 2732 | return 0; |
2691 | } | 2733 | } |
2692 | 2734 | ||
2693 | void exit_ext4_mballoc(void) | 2735 | void ext4_exit_mballoc(void) |
2694 | { | 2736 | { |
2737 | int i; | ||
2695 | /* | 2738 | /* |
2696 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2739 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
2697 | * before destroying the slab cache. | 2740 | * before destroying the slab cache. |
@@ -2700,6 +2743,15 @@ void exit_ext4_mballoc(void) | |||
2700 | kmem_cache_destroy(ext4_pspace_cachep); | 2743 | kmem_cache_destroy(ext4_pspace_cachep); |
2701 | kmem_cache_destroy(ext4_ac_cachep); | 2744 | kmem_cache_destroy(ext4_ac_cachep); |
2702 | kmem_cache_destroy(ext4_free_ext_cachep); | 2745 | kmem_cache_destroy(ext4_free_ext_cachep); |
2746 | |||
2747 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2748 | struct kmem_cache *cachep = ext4_groupinfo_caches[i]; | ||
2749 | if (cachep) { | ||
2750 | char *name = (char *)kmem_cache_name(cachep); | ||
2751 | kmem_cache_destroy(cachep); | ||
2752 | kfree(name); | ||
2753 | } | ||
2754 | } | ||
2703 | ext4_remove_debugfs_entry(); | 2755 | ext4_remove_debugfs_entry(); |
2704 | } | 2756 | } |
2705 | 2757 | ||
@@ -3536,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) | |||
3536 | */ | 3588 | */ |
3537 | static noinline_for_stack int | 3589 | static noinline_for_stack int |
3538 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | 3590 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, |
3539 | struct ext4_prealloc_space *pa, | 3591 | struct ext4_prealloc_space *pa) |
3540 | struct ext4_allocation_context *ac) | ||
3541 | { | 3592 | { |
3542 | struct super_block *sb = e4b->bd_sb; | 3593 | struct super_block *sb = e4b->bd_sb; |
3543 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3594 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -3555,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3555 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3606 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3556 | end = bit + pa->pa_len; | 3607 | end = bit + pa->pa_len; |
3557 | 3608 | ||
3558 | if (ac) { | ||
3559 | ac->ac_sb = sb; | ||
3560 | ac->ac_inode = pa->pa_inode; | ||
3561 | } | ||
3562 | |||
3563 | while (bit < end) { | 3609 | while (bit < end) { |
3564 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); | 3610 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); |
3565 | if (bit >= end) | 3611 | if (bit >= end) |
@@ -3570,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3570 | (unsigned) next - bit, (unsigned) group); | 3616 | (unsigned) next - bit, (unsigned) group); |
3571 | free += next - bit; | 3617 | free += next - bit; |
3572 | 3618 | ||
3573 | if (ac) { | 3619 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3574 | ac->ac_b_ex.fe_group = group; | 3620 | trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, |
3575 | ac->ac_b_ex.fe_start = bit; | 3621 | grp_blk_start + bit, next - bit); |
3576 | ac->ac_b_ex.fe_len = next - bit; | ||
3577 | ac->ac_b_ex.fe_logical = 0; | ||
3578 | trace_ext4_mballoc_discard(ac); | ||
3579 | } | ||
3580 | |||
3581 | trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit, | ||
3582 | next - bit); | ||
3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3622 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3584 | bit = next + 1; | 3623 | bit = next + 1; |
3585 | } | 3624 | } |
@@ -3602,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3602 | 3641 | ||
3603 | static noinline_for_stack int | 3642 | static noinline_for_stack int |
3604 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, | 3643 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, |
3605 | struct ext4_prealloc_space *pa, | 3644 | struct ext4_prealloc_space *pa) |
3606 | struct ext4_allocation_context *ac) | ||
3607 | { | 3645 | { |
3608 | struct super_block *sb = e4b->bd_sb; | 3646 | struct super_block *sb = e4b->bd_sb; |
3609 | ext4_group_t group; | 3647 | ext4_group_t group; |
3610 | ext4_grpblk_t bit; | 3648 | ext4_grpblk_t bit; |
3611 | 3649 | ||
3612 | trace_ext4_mb_release_group_pa(sb, ac, pa); | 3650 | trace_ext4_mb_release_group_pa(sb, pa); |
3613 | BUG_ON(pa->pa_deleted == 0); | 3651 | BUG_ON(pa->pa_deleted == 0); |
3614 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3652 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3615 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3653 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3616 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); | 3654 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
3617 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); | 3655 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
3618 | 3656 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); | |
3619 | if (ac) { | ||
3620 | ac->ac_sb = sb; | ||
3621 | ac->ac_inode = NULL; | ||
3622 | ac->ac_b_ex.fe_group = group; | ||
3623 | ac->ac_b_ex.fe_start = bit; | ||
3624 | ac->ac_b_ex.fe_len = pa->pa_len; | ||
3625 | ac->ac_b_ex.fe_logical = 0; | ||
3626 | trace_ext4_mballoc_discard(ac); | ||
3627 | } | ||
3628 | 3657 | ||
3629 | return 0; | 3658 | return 0; |
3630 | } | 3659 | } |
@@ -3645,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3645 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 3674 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
3646 | struct buffer_head *bitmap_bh = NULL; | 3675 | struct buffer_head *bitmap_bh = NULL; |
3647 | struct ext4_prealloc_space *pa, *tmp; | 3676 | struct ext4_prealloc_space *pa, *tmp; |
3648 | struct ext4_allocation_context *ac; | ||
3649 | struct list_head list; | 3677 | struct list_head list; |
3650 | struct ext4_buddy e4b; | 3678 | struct ext4_buddy e4b; |
3651 | int err; | 3679 | int err; |
@@ -3674,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3674 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3702 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; |
3675 | 3703 | ||
3676 | INIT_LIST_HEAD(&list); | 3704 | INIT_LIST_HEAD(&list); |
3677 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3678 | if (ac) | ||
3679 | ac->ac_sb = sb; | ||
3680 | repeat: | 3705 | repeat: |
3681 | ext4_lock_group(sb, group); | 3706 | ext4_lock_group(sb, group); |
3682 | list_for_each_entry_safe(pa, tmp, | 3707 | list_for_each_entry_safe(pa, tmp, |
@@ -3731,9 +3756,9 @@ repeat: | |||
3731 | spin_unlock(pa->pa_obj_lock); | 3756 | spin_unlock(pa->pa_obj_lock); |
3732 | 3757 | ||
3733 | if (pa->pa_type == MB_GROUP_PA) | 3758 | if (pa->pa_type == MB_GROUP_PA) |
3734 | ext4_mb_release_group_pa(&e4b, pa, ac); | 3759 | ext4_mb_release_group_pa(&e4b, pa); |
3735 | else | 3760 | else |
3736 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3761 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3737 | 3762 | ||
3738 | list_del(&pa->u.pa_tmp_list); | 3763 | list_del(&pa->u.pa_tmp_list); |
3739 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3764 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
@@ -3741,8 +3766,6 @@ repeat: | |||
3741 | 3766 | ||
3742 | out: | 3767 | out: |
3743 | ext4_unlock_group(sb, group); | 3768 | ext4_unlock_group(sb, group); |
3744 | if (ac) | ||
3745 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3746 | ext4_mb_unload_buddy(&e4b); | 3769 | ext4_mb_unload_buddy(&e4b); |
3747 | put_bh(bitmap_bh); | 3770 | put_bh(bitmap_bh); |
3748 | return free; | 3771 | return free; |
@@ -3763,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3763 | struct super_block *sb = inode->i_sb; | 3786 | struct super_block *sb = inode->i_sb; |
3764 | struct buffer_head *bitmap_bh = NULL; | 3787 | struct buffer_head *bitmap_bh = NULL; |
3765 | struct ext4_prealloc_space *pa, *tmp; | 3788 | struct ext4_prealloc_space *pa, *tmp; |
3766 | struct ext4_allocation_context *ac; | ||
3767 | ext4_group_t group = 0; | 3789 | ext4_group_t group = 0; |
3768 | struct list_head list; | 3790 | struct list_head list; |
3769 | struct ext4_buddy e4b; | 3791 | struct ext4_buddy e4b; |
@@ -3779,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3779 | 3801 | ||
3780 | INIT_LIST_HEAD(&list); | 3802 | INIT_LIST_HEAD(&list); |
3781 | 3803 | ||
3782 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3783 | if (ac) { | ||
3784 | ac->ac_sb = sb; | ||
3785 | ac->ac_inode = inode; | ||
3786 | } | ||
3787 | repeat: | 3804 | repeat: |
3788 | /* first, collect all pa's in the inode */ | 3805 | /* first, collect all pa's in the inode */ |
3789 | spin_lock(&ei->i_prealloc_lock); | 3806 | spin_lock(&ei->i_prealloc_lock); |
@@ -3853,7 +3870,7 @@ repeat: | |||
3853 | 3870 | ||
3854 | ext4_lock_group(sb, group); | 3871 | ext4_lock_group(sb, group); |
3855 | list_del(&pa->pa_group_list); | 3872 | list_del(&pa->pa_group_list); |
3856 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3873 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3857 | ext4_unlock_group(sb, group); | 3874 | ext4_unlock_group(sb, group); |
3858 | 3875 | ||
3859 | ext4_mb_unload_buddy(&e4b); | 3876 | ext4_mb_unload_buddy(&e4b); |
@@ -3862,8 +3879,6 @@ repeat: | |||
3862 | list_del(&pa->u.pa_tmp_list); | 3879 | list_del(&pa->u.pa_tmp_list); |
3863 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3880 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
3864 | } | 3881 | } |
3865 | if (ac) | ||
3866 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3867 | } | 3882 | } |
3868 | 3883 | ||
3869 | /* | 3884 | /* |
@@ -4061,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4061 | struct ext4_buddy e4b; | 4076 | struct ext4_buddy e4b; |
4062 | struct list_head discard_list; | 4077 | struct list_head discard_list; |
4063 | struct ext4_prealloc_space *pa, *tmp; | 4078 | struct ext4_prealloc_space *pa, *tmp; |
4064 | struct ext4_allocation_context *ac; | ||
4065 | 4079 | ||
4066 | mb_debug(1, "discard locality group preallocation\n"); | 4080 | mb_debug(1, "discard locality group preallocation\n"); |
4067 | 4081 | ||
4068 | INIT_LIST_HEAD(&discard_list); | 4082 | INIT_LIST_HEAD(&discard_list); |
4069 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4070 | if (ac) | ||
4071 | ac->ac_sb = sb; | ||
4072 | 4083 | ||
4073 | spin_lock(&lg->lg_prealloc_lock); | 4084 | spin_lock(&lg->lg_prealloc_lock); |
4074 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], | 4085 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
@@ -4120,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4120 | } | 4131 | } |
4121 | ext4_lock_group(sb, group); | 4132 | ext4_lock_group(sb, group); |
4122 | list_del(&pa->pa_group_list); | 4133 | list_del(&pa->pa_group_list); |
4123 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4134 | ext4_mb_release_group_pa(&e4b, pa); |
4124 | ext4_unlock_group(sb, group); | 4135 | ext4_unlock_group(sb, group); |
4125 | 4136 | ||
4126 | ext4_mb_unload_buddy(&e4b); | 4137 | ext4_mb_unload_buddy(&e4b); |
4127 | list_del(&pa->u.pa_tmp_list); | 4138 | list_del(&pa->u.pa_tmp_list); |
4128 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4139 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4129 | } | 4140 | } |
4130 | if (ac) | ||
4131 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4132 | } | 4141 | } |
4133 | 4142 | ||
4134 | /* | 4143 | /* |
@@ -4492,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4492 | { | 4501 | { |
4493 | struct buffer_head *bitmap_bh = NULL; | 4502 | struct buffer_head *bitmap_bh = NULL; |
4494 | struct super_block *sb = inode->i_sb; | 4503 | struct super_block *sb = inode->i_sb; |
4495 | struct ext4_allocation_context *ac = NULL; | ||
4496 | struct ext4_group_desc *gdp; | 4504 | struct ext4_group_desc *gdp; |
4497 | unsigned long freed = 0; | 4505 | unsigned long freed = 0; |
4498 | unsigned int overflow; | 4506 | unsigned int overflow; |
@@ -4532,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4532 | if (!bh) | 4540 | if (!bh) |
4533 | tbh = sb_find_get_block(inode->i_sb, | 4541 | tbh = sb_find_get_block(inode->i_sb, |
4534 | block + i); | 4542 | block + i); |
4543 | if (unlikely(!tbh)) | ||
4544 | continue; | ||
4535 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4545 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4536 | inode, tbh, block + i); | 4546 | inode, tbh, block + i); |
4537 | } | 4547 | } |
@@ -4547,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4547 | if (!ext4_should_writeback_data(inode)) | 4557 | if (!ext4_should_writeback_data(inode)) |
4548 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4558 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4549 | 4559 | ||
4550 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4551 | if (ac) { | ||
4552 | ac->ac_inode = inode; | ||
4553 | ac->ac_sb = sb; | ||
4554 | } | ||
4555 | |||
4556 | do_more: | 4560 | do_more: |
4557 | overflow = 0; | 4561 | overflow = 0; |
4558 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4562 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4610,12 +4614,7 @@ do_more: | |||
4610 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4614 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4611 | } | 4615 | } |
4612 | #endif | 4616 | #endif |
4613 | if (ac) { | 4617 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); |
4614 | ac->ac_b_ex.fe_group = block_group; | ||
4615 | ac->ac_b_ex.fe_start = bit; | ||
4616 | ac->ac_b_ex.fe_len = count; | ||
4617 | trace_ext4_mballoc_free(ac); | ||
4618 | } | ||
4619 | 4618 | ||
4620 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4621 | if (err) | 4620 | if (err) |
@@ -4641,12 +4640,12 @@ do_more: | |||
4641 | * with group lock held. generate_buddy look at | 4640 | * with group lock held. generate_buddy look at |
4642 | * them with group lock_held | 4641 | * them with group lock_held |
4643 | */ | 4642 | */ |
4643 | if (test_opt(sb, DISCARD)) | ||
4644 | ext4_issue_discard(sb, block_group, bit, count); | ||
4644 | ext4_lock_group(sb, block_group); | 4645 | ext4_lock_group(sb, block_group); |
4645 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4646 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4646 | mb_free_blocks(inode, &e4b, bit, count); | 4647 | mb_free_blocks(inode, &e4b, bit, count); |
4647 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4648 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
4648 | if (test_opt(sb, DISCARD)) | ||
4649 | ext4_issue_discard(sb, block_group, bit, count); | ||
4650 | } | 4649 | } |
4651 | 4650 | ||
4652 | ret = ext4_free_blks_count(sb, gdp) + count; | 4651 | ret = ext4_free_blks_count(sb, gdp) + count; |
@@ -4686,7 +4685,190 @@ error_return: | |||
4686 | dquot_free_block(inode, freed); | 4685 | dquot_free_block(inode, freed); |
4687 | brelse(bitmap_bh); | 4686 | brelse(bitmap_bh); |
4688 | ext4_std_error(sb, err); | 4687 | ext4_std_error(sb, err); |
4689 | if (ac) | ||
4690 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4691 | return; | 4688 | return; |
4692 | } | 4689 | } |
4690 | |||
4691 | /** | ||
4692 | * ext4_trim_extent -- function to TRIM one single free extent in the group | ||
4693 | * @sb: super block for the file system | ||
4694 | * @start: starting block of the free extent in the alloc. group | ||
4695 | * @count: number of blocks to TRIM | ||
4696 | * @group: alloc. group we are working with | ||
4697 | * @e4b: ext4 buddy for the group | ||
4698 | * | ||
4699 | * Trim "count" blocks starting at "start" in the "group". To assure that no | ||
4700 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | ||
4701 | * be called with under the group lock. | ||
4702 | */ | ||
4703 | static int ext4_trim_extent(struct super_block *sb, int start, int count, | ||
4704 | ext4_group_t group, struct ext4_buddy *e4b) | ||
4705 | { | ||
4706 | struct ext4_free_extent ex; | ||
4707 | int ret = 0; | ||
4708 | |||
4709 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | ||
4710 | |||
4711 | ex.fe_start = start; | ||
4712 | ex.fe_group = group; | ||
4713 | ex.fe_len = count; | ||
4714 | |||
4715 | /* | ||
4716 | * Mark blocks used, so no one can reuse them while | ||
4717 | * being trimmed. | ||
4718 | */ | ||
4719 | mb_mark_used(e4b, &ex); | ||
4720 | ext4_unlock_group(sb, group); | ||
4721 | |||
4722 | ret = ext4_issue_discard(sb, group, start, count); | ||
4723 | if (ret) | ||
4724 | ext4_std_error(sb, ret); | ||
4725 | |||
4726 | ext4_lock_group(sb, group); | ||
4727 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | ||
4728 | return ret; | ||
4729 | } | ||
4730 | |||
4731 | /** | ||
4732 | * ext4_trim_all_free -- function to trim all free space in alloc. group | ||
4733 | * @sb: super block for file system | ||
4734 | * @e4b: ext4 buddy | ||
4735 | * @start: first group block to examine | ||
4736 | * @max: last group block to examine | ||
4737 | * @minblocks: minimum extent block count | ||
4738 | * | ||
4739 | * ext4_trim_all_free walks through group's buddy bitmap searching for free | ||
4740 | * extents. When the free block is found, ext4_trim_extent is called to TRIM | ||
4741 | * the extent. | ||
4742 | * | ||
4743 | * | ||
4744 | * ext4_trim_all_free walks through group's block bitmap searching for free | ||
4745 | * extents. When the free extent is found, mark it as used in group buddy | ||
4746 | * bitmap. Then issue a TRIM command on this extent and free the extent in | ||
4747 | * the group buddy bitmap. This is done until whole group is scanned. | ||
4748 | */ | ||
4749 | ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | ||
4750 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | ||
4751 | { | ||
4752 | void *bitmap; | ||
4753 | ext4_grpblk_t next, count = 0; | ||
4754 | ext4_group_t group; | ||
4755 | int ret = 0; | ||
4756 | |||
4757 | BUG_ON(e4b == NULL); | ||
4758 | |||
4759 | bitmap = e4b->bd_bitmap; | ||
4760 | group = e4b->bd_group; | ||
4761 | start = (e4b->bd_info->bb_first_free > start) ? | ||
4762 | e4b->bd_info->bb_first_free : start; | ||
4763 | ext4_lock_group(sb, group); | ||
4764 | |||
4765 | while (start < max) { | ||
4766 | start = mb_find_next_zero_bit(bitmap, max, start); | ||
4767 | if (start >= max) | ||
4768 | break; | ||
4769 | next = mb_find_next_bit(bitmap, max, start); | ||
4770 | |||
4771 | if ((next - start) >= minblocks) { | ||
4772 | ret = ext4_trim_extent(sb, start, | ||
4773 | next - start, group, e4b); | ||
4774 | if (ret < 0) | ||
4775 | break; | ||
4776 | count += next - start; | ||
4777 | } | ||
4778 | start = next + 1; | ||
4779 | |||
4780 | if (fatal_signal_pending(current)) { | ||
4781 | count = -ERESTARTSYS; | ||
4782 | break; | ||
4783 | } | ||
4784 | |||
4785 | if (need_resched()) { | ||
4786 | ext4_unlock_group(sb, group); | ||
4787 | cond_resched(); | ||
4788 | ext4_lock_group(sb, group); | ||
4789 | } | ||
4790 | |||
4791 | if ((e4b->bd_info->bb_free - count) < minblocks) | ||
4792 | break; | ||
4793 | } | ||
4794 | ext4_unlock_group(sb, group); | ||
4795 | |||
4796 | ext4_debug("trimmed %d blocks in the group %d\n", | ||
4797 | count, group); | ||
4798 | |||
4799 | if (ret < 0) | ||
4800 | count = ret; | ||
4801 | |||
4802 | return count; | ||
4803 | } | ||
4804 | |||
4805 | /** | ||
4806 | * ext4_trim_fs() -- trim ioctl handle function | ||
4807 | * @sb: superblock for filesystem | ||
4808 | * @range: fstrim_range structure | ||
4809 | * | ||
4810 | * start: First Byte to trim | ||
4811 | * len: number of Bytes to trim from start | ||
4812 | * minlen: minimum extent length in Bytes | ||
4813 | * ext4_trim_fs goes through all allocation groups containing Bytes from | ||
4814 | * start to start+len. For each such a group ext4_trim_all_free function | ||
4815 | * is invoked to trim all free space. | ||
4816 | */ | ||
4817 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
4818 | { | ||
4819 | struct ext4_buddy e4b; | ||
4820 | ext4_group_t first_group, last_group; | ||
4821 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
4822 | ext4_grpblk_t cnt = 0, first_block, last_block; | ||
4823 | uint64_t start, len, minlen, trimmed; | ||
4824 | int ret = 0; | ||
4825 | |||
4826 | start = range->start >> sb->s_blocksize_bits; | ||
4827 | len = range->len >> sb->s_blocksize_bits; | ||
4828 | minlen = range->minlen >> sb->s_blocksize_bits; | ||
4829 | trimmed = 0; | ||
4830 | |||
4831 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | ||
4832 | return -EINVAL; | ||
4833 | |||
4834 | /* Determine first and last group to examine based on start and len */ | ||
4835 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | ||
4836 | &first_group, &first_block); | ||
4837 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | ||
4838 | &last_group, &last_block); | ||
4839 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
4840 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | ||
4841 | |||
4842 | if (first_group > last_group) | ||
4843 | return -EINVAL; | ||
4844 | |||
4845 | for (group = first_group; group <= last_group; group++) { | ||
4846 | ret = ext4_mb_load_buddy(sb, group, &e4b); | ||
4847 | if (ret) { | ||
4848 | ext4_error(sb, "Error in loading buddy " | ||
4849 | "information for %u", group); | ||
4850 | break; | ||
4851 | } | ||
4852 | |||
4853 | if (len >= EXT4_BLOCKS_PER_GROUP(sb)) | ||
4854 | len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); | ||
4855 | else | ||
4856 | last_block = len; | ||
4857 | |||
4858 | if (e4b.bd_info->bb_free >= minlen) { | ||
4859 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | ||
4860 | last_block, minlen); | ||
4861 | if (cnt < 0) { | ||
4862 | ret = cnt; | ||
4863 | ext4_mb_unload_buddy(&e4b); | ||
4864 | break; | ||
4865 | } | ||
4866 | } | ||
4867 | ext4_mb_unload_buddy(&e4b); | ||
4868 | trimmed += cnt; | ||
4869 | first_block = 0; | ||
4870 | } | ||
4871 | range->len = trimmed * sb->s_blocksize; | ||
4872 | |||
4873 | return ret; | ||
4874 | } | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 1765c2c50a9b..25f3a974b725 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
412 | struct buffer_head *bh; | 412 | struct buffer_head *bh; |
413 | struct ext4_extent_header *eh; | 413 | struct ext4_extent_header *eh; |
414 | 414 | ||
415 | block = idx_pblock(ix); | 415 | block = ext4_idx_pblock(ix); |
416 | bh = sb_bread(inode->i_sb, block); | 416 | bh = sb_bread(inode->i_sb, block); |
417 | if (!bh) | 417 | if (!bh) |
418 | return -EIO; | 418 | return -EIO; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 5f1ed9fc913c..b9f3e7862f13 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | 85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { |
86 | /* leaf block */ | 86 | /* leaf block */ |
87 | *extent = ++path[ppos].p_ext; | 87 | *extent = ++path[ppos].p_ext; |
88 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | 88 | path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
96 | 96 | ||
97 | /* index block */ | 97 | /* index block */ |
98 | path[ppos].p_idx++; | 98 | path[ppos].p_idx++; |
99 | path[ppos].p_block = idx_pblock(path[ppos].p_idx); | 99 | path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); |
100 | if (path[ppos+1].p_bh) | 100 | if (path[ppos+1].p_bh) |
101 | brelse(path[ppos+1].p_bh); | 101 | brelse(path[ppos+1].p_bh); |
102 | path[ppos+1].p_bh = | 102 | path[ppos+1].p_bh = |
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
111 | path[cur_ppos].p_idx = | 111 | path[cur_ppos].p_idx = |
112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); | 112 | EXT_FIRST_INDEX(path[cur_ppos].p_hdr); |
113 | path[cur_ppos].p_block = | 113 | path[cur_ppos].p_block = |
114 | idx_pblock(path[cur_ppos].p_idx); | 114 | ext4_idx_pblock(path[cur_ppos].p_idx); |
115 | if (path[cur_ppos+1].p_bh) | 115 | if (path[cur_ppos+1].p_bh) |
116 | brelse(path[cur_ppos+1].p_bh); | 116 | brelse(path[cur_ppos+1].p_bh); |
117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, | 117 | path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, |
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
133 | path[leaf_ppos].p_ext = *extent = | 133 | path[leaf_ppos].p_ext = *extent = |
134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | 134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); |
135 | path[leaf_ppos].p_block = | 135 | path[leaf_ppos].p_block = |
136 | ext_pblock(path[leaf_ppos].p_ext); | 136 | ext4_ext_pblock(path[leaf_ppos].p_ext); |
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | } | 139 | } |
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
249 | */ | 249 | */ |
250 | o_end->ee_block = end_ext->ee_block; | 250 | o_end->ee_block = end_ext->ee_block; |
251 | o_end->ee_len = end_ext->ee_len; | 251 | o_end->ee_len = end_ext->ee_len; |
252 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 252 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
253 | } | 253 | } |
254 | 254 | ||
255 | o_start->ee_len = start_ext->ee_len; | 255 | o_start->ee_len = start_ext->ee_len; |
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
276 | */ | 276 | */ |
277 | o_end->ee_block = end_ext->ee_block; | 277 | o_end->ee_block = end_ext->ee_block; |
278 | o_end->ee_len = end_ext->ee_len; | 278 | o_end->ee_len = end_ext->ee_len; |
279 | ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); | 279 | ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); |
280 | 280 | ||
281 | /* | 281 | /* |
282 | * Set 0 to the extent block if new_ext was | 282 | * Set 0 to the extent block if new_ext was |
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start, | |||
361 | /* Insert new entry */ | 361 | /* Insert new entry */ |
362 | if (new_ext->ee_len) { | 362 | if (new_ext->ee_len) { |
363 | o_start[i] = *new_ext; | 363 | o_start[i] = *new_ext; |
364 | ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); | 364 | ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); |
365 | } | 365 | } |
366 | 366 | ||
367 | /* Insert end entry */ | 367 | /* Insert end entry */ |
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
488 | start_ext.ee_len = end_ext.ee_len = 0; | 488 | start_ext.ee_len = end_ext.ee_len = 0; |
489 | 489 | ||
490 | new_ext.ee_block = cpu_to_le32(*from); | 490 | new_ext.ee_block = cpu_to_le32(*from); |
491 | ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); | 491 | ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); |
492 | new_ext.ee_len = dext->ee_len; | 492 | new_ext.ee_len = dext->ee_len; |
493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | 493 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); |
494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | 494 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; |
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
553 | copy_extent_status(oext, &end_ext); | 553 | copy_extent_status(oext, &end_ext); |
554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); | 554 | end_ext_alen = ext4_ext_get_actual_len(&end_ext); |
555 | ext4_ext_store_pblock(&end_ext, | 555 | ext4_ext_store_pblock(&end_ext, |
556 | (ext_pblock(o_end) + oext_alen - end_ext_alen)); | 556 | (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); |
557 | end_ext.ee_block = | 557 | end_ext.ee_block = |
558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + | 558 | cpu_to_le32(le32_to_cpu(o_end->ee_block) + |
559 | oext_alen - end_ext_alen); | 559 | oext_alen - end_ext_alen); |
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
604 | /* When tmp_dext is too large, pick up the target range. */ | 604 | /* When tmp_dext is too large, pick up the target range. */ |
605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); | 605 | diff = donor_off - le32_to_cpu(tmp_dext->ee_block); |
606 | 606 | ||
607 | ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); | 607 | ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); |
608 | tmp_dext->ee_block = | 608 | tmp_dext->ee_block = |
609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); | 609 | cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); |
610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); | 610 | tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); |
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
613 | tmp_dext->ee_len = cpu_to_le16(max_count); | 613 | tmp_dext->ee_len = cpu_to_le16(max_count); |
614 | 614 | ||
615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); | 615 | orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); |
616 | ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); | 616 | ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); |
617 | 617 | ||
618 | /* Adjust extent length if donor extent is larger than orig */ | 618 | /* Adjust extent length if donor extent is larger than orig */ |
619 | if (ext4_ext_get_actual_len(tmp_dext) > | 619 | if (ext4_ext_get_actual_len(tmp_dext) > |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index bd39885b5998..92203b8a099f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
856 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; | 856 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
857 | struct buffer_head *bh, *ret = NULL; | 857 | struct buffer_head *bh, *ret = NULL; |
858 | ext4_lblk_t start, block, b; | 858 | ext4_lblk_t start, block, b; |
859 | const u8 *name = d_name->name; | ||
859 | int ra_max = 0; /* Number of bh's in the readahead | 860 | int ra_max = 0; /* Number of bh's in the readahead |
860 | buffer, bh_use[] */ | 861 | buffer, bh_use[] */ |
861 | int ra_ptr = 0; /* Current index into readahead | 862 | int ra_ptr = 0; /* Current index into readahead |
@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
870 | namelen = d_name->len; | 871 | namelen = d_name->len; |
871 | if (namelen > EXT4_NAME_LEN) | 872 | if (namelen > EXT4_NAME_LEN) |
872 | return NULL; | 873 | return NULL; |
874 | if ((namelen <= 2) && (name[0] == '.') && | ||
875 | (name[1] == '.' || name[1] == '0')) { | ||
876 | /* | ||
877 | * "." or ".." will only be in the first block | ||
878 | * NFS may look up ".."; "." should be handled by the VFS | ||
879 | */ | ||
880 | block = start = 0; | ||
881 | nblocks = 1; | ||
882 | goto restart; | ||
883 | } | ||
873 | if (is_dx(dir)) { | 884 | if (is_dx(dir)) { |
874 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); | 885 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
875 | /* | 886 | /* |
@@ -960,55 +971,35 @@ cleanup_and_exit: | |||
960 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, | 971 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
961 | struct ext4_dir_entry_2 **res_dir, int *err) | 972 | struct ext4_dir_entry_2 **res_dir, int *err) |
962 | { | 973 | { |
963 | struct super_block * sb; | 974 | struct super_block * sb = dir->i_sb; |
964 | struct dx_hash_info hinfo; | 975 | struct dx_hash_info hinfo; |
965 | u32 hash; | ||
966 | struct dx_frame frames[2], *frame; | 976 | struct dx_frame frames[2], *frame; |
967 | struct ext4_dir_entry_2 *de, *top; | ||
968 | struct buffer_head *bh; | 977 | struct buffer_head *bh; |
969 | ext4_lblk_t block; | 978 | ext4_lblk_t block; |
970 | int retval; | 979 | int retval; |
971 | int namelen = d_name->len; | ||
972 | const u8 *name = d_name->name; | ||
973 | 980 | ||
974 | sb = dir->i_sb; | 981 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
975 | /* NFS may look up ".." - look at dx_root directory block */ | 982 | return NULL; |
976 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | ||
977 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) | ||
978 | return NULL; | ||
979 | } else { | ||
980 | frame = frames; | ||
981 | frame->bh = NULL; /* for dx_release() */ | ||
982 | frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ | ||
983 | dx_set_block(frame->at, 0); /* dx_root block is 0 */ | ||
984 | } | ||
985 | hash = hinfo.hash; | ||
986 | do { | 983 | do { |
987 | block = dx_get_block(frame->at); | 984 | block = dx_get_block(frame->at); |
988 | if (!(bh = ext4_bread (NULL,dir, block, 0, err))) | 985 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) |
989 | goto errout; | 986 | goto errout; |
990 | de = (struct ext4_dir_entry_2 *) bh->b_data; | ||
991 | top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - | ||
992 | EXT4_DIR_REC_LEN(0)); | ||
993 | for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) { | ||
994 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) | ||
995 | + ((char *) de - bh->b_data); | ||
996 | |||
997 | if (!ext4_check_dir_entry(dir, de, bh, off)) { | ||
998 | brelse(bh); | ||
999 | *err = ERR_BAD_DX_DIR; | ||
1000 | goto errout; | ||
1001 | } | ||
1002 | 987 | ||
1003 | if (ext4_match(namelen, name, de)) { | 988 | retval = search_dirblock(bh, dir, d_name, |
1004 | *res_dir = de; | 989 | block << EXT4_BLOCK_SIZE_BITS(sb), |
1005 | dx_release(frames); | 990 | res_dir); |
1006 | return bh; | 991 | if (retval == 1) { /* Success! */ |
1007 | } | 992 | dx_release(frames); |
993 | return bh; | ||
1008 | } | 994 | } |
1009 | brelse(bh); | 995 | brelse(bh); |
996 | if (retval == -1) { | ||
997 | *err = ERR_BAD_DX_DIR; | ||
998 | goto errout; | ||
999 | } | ||
1000 | |||
1010 | /* Check to see if we should continue to search */ | 1001 | /* Check to see if we should continue to search */ |
1011 | retval = ext4_htree_next_block(dir, hash, frame, | 1002 | retval = ext4_htree_next_block(dir, hinfo.hash, frame, |
1012 | frames, NULL); | 1003 | frames, NULL); |
1013 | if (retval < 0) { | 1004 | if (retval < 0) { |
1014 | ext4_warning(sb, | 1005 | ext4_warning(sb, |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c new file mode 100644 index 000000000000..46a7d6a9d976 --- /dev/null +++ b/fs/ext4/page-io.c | |||
@@ -0,0 +1,430 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/page-io.c | ||
3 | * | ||
4 | * This contains the new page_io functions for ext4 | ||
5 | * | ||
6 | * Written by Theodore Ts'o, 2010. | ||
7 | */ | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/time.h> | ||
12 | #include <linux/jbd2.h> | ||
13 | #include <linux/highuid.h> | ||
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/quotaops.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/buffer_head.h> | ||
18 | #include <linux/writeback.h> | ||
19 | #include <linux/pagevec.h> | ||
20 | #include <linux/mpage.h> | ||
21 | #include <linux/namei.h> | ||
22 | #include <linux/uio.h> | ||
23 | #include <linux/bio.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | #include "ext4_jbd2.h" | ||
29 | #include "xattr.h" | ||
30 | #include "acl.h" | ||
31 | #include "ext4_extents.h" | ||
32 | |||
33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | ||
34 | |||
35 | int __init ext4_init_pageio(void) | ||
36 | { | ||
37 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | ||
38 | if (io_page_cachep == NULL) | ||
39 | return -ENOMEM; | ||
40 | io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); | ||
41 | if (io_page_cachep == NULL) { | ||
42 | kmem_cache_destroy(io_page_cachep); | ||
43 | return -ENOMEM; | ||
44 | } | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | void ext4_exit_pageio(void) | ||
50 | { | ||
51 | kmem_cache_destroy(io_end_cachep); | ||
52 | kmem_cache_destroy(io_page_cachep); | ||
53 | } | ||
54 | |||
55 | void ext4_free_io_end(ext4_io_end_t *io) | ||
56 | { | ||
57 | int i; | ||
58 | |||
59 | BUG_ON(!io); | ||
60 | if (io->page) | ||
61 | put_page(io->page); | ||
62 | for (i = 0; i < io->num_io_pages; i++) { | ||
63 | if (--io->pages[i]->p_count == 0) { | ||
64 | struct page *page = io->pages[i]->p_page; | ||
65 | |||
66 | end_page_writeback(page); | ||
67 | put_page(page); | ||
68 | kmem_cache_free(io_page_cachep, io->pages[i]); | ||
69 | } | ||
70 | } | ||
71 | io->num_io_pages = 0; | ||
72 | iput(io->inode); | ||
73 | kmem_cache_free(io_end_cachep, io); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * check a range of space and convert unwritten extents to written. | ||
78 | */ | ||
79 | int ext4_end_io_nolock(ext4_io_end_t *io) | ||
80 | { | ||
81 | struct inode *inode = io->inode; | ||
82 | loff_t offset = io->offset; | ||
83 | ssize_t size = io->size; | ||
84 | int ret = 0; | ||
85 | |||
86 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
87 | "list->prev 0x%p\n", | ||
88 | io, inode->i_ino, io->list.next, io->list.prev); | ||
89 | |||
90 | if (list_empty(&io->list)) | ||
91 | return ret; | ||
92 | |||
93 | if (!(io->flag & EXT4_IO_END_UNWRITTEN)) | ||
94 | return ret; | ||
95 | |||
96 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
97 | if (ret < 0) { | ||
98 | printk(KERN_EMERG "%s: failed to convert unwritten " | ||
99 | "extents to written extents, error is %d " | ||
100 | "io is still on inode %lu aio dio list\n", | ||
101 | __func__, ret, inode->i_ino); | ||
102 | return ret; | ||
103 | } | ||
104 | |||
105 | if (io->iocb) | ||
106 | aio_complete(io->iocb, io->result, 0); | ||
107 | /* clear the DIO AIO unwritten flag */ | ||
108 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
114 | */ | ||
115 | static void ext4_end_io_work(struct work_struct *work) | ||
116 | { | ||
117 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
118 | struct inode *inode = io->inode; | ||
119 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
120 | unsigned long flags; | ||
121 | int ret; | ||
122 | |||
123 | mutex_lock(&inode->i_mutex); | ||
124 | ret = ext4_end_io_nolock(io); | ||
125 | if (ret < 0) { | ||
126 | mutex_unlock(&inode->i_mutex); | ||
127 | return; | ||
128 | } | ||
129 | |||
130 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
131 | if (!list_empty(&io->list)) | ||
132 | list_del_init(&io->list); | ||
133 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
134 | mutex_unlock(&inode->i_mutex); | ||
135 | ext4_free_io_end(io); | ||
136 | } | ||
137 | |||
138 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | ||
139 | { | ||
140 | ext4_io_end_t *io = NULL; | ||
141 | |||
142 | io = kmem_cache_alloc(io_end_cachep, flags); | ||
143 | if (io) { | ||
144 | memset(io, 0, sizeof(*io)); | ||
145 | io->inode = igrab(inode); | ||
146 | BUG_ON(!io->inode); | ||
147 | INIT_WORK(&io->work, ext4_end_io_work); | ||
148 | INIT_LIST_HEAD(&io->list); | ||
149 | } | ||
150 | return io; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
155 | * provides compatibility with dmesg scrapers that look for a specific | ||
156 | * buffer I/O error message. We really need a unified error reporting | ||
157 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
158 | * probably not going to happen in my lifetime, due to LKML politics... | ||
159 | */ | ||
160 | static void buffer_io_error(struct buffer_head *bh) | ||
161 | { | ||
162 | char b[BDEVNAME_SIZE]; | ||
163 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
164 | bdevname(bh->b_bdev, b), | ||
165 | (unsigned long long)bh->b_blocknr); | ||
166 | } | ||
167 | |||
168 | static void ext4_end_bio(struct bio *bio, int error) | ||
169 | { | ||
170 | ext4_io_end_t *io_end = bio->bi_private; | ||
171 | struct workqueue_struct *wq; | ||
172 | struct inode *inode; | ||
173 | unsigned long flags; | ||
174 | ext4_fsblk_t err_block; | ||
175 | int i; | ||
176 | |||
177 | BUG_ON(!io_end); | ||
178 | inode = io_end->inode; | ||
179 | bio->bi_private = NULL; | ||
180 | bio->bi_end_io = NULL; | ||
181 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
182 | error = 0; | ||
183 | err_block = bio->bi_sector >> (inode->i_blkbits - 9); | ||
184 | bio_put(bio); | ||
185 | |||
186 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) { | ||
187 | pr_err("sb umounted, discard end_io request for inode %lu\n", | ||
188 | io_end->inode->i_ino); | ||
189 | ext4_free_io_end(io_end); | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | if (error) { | ||
194 | io_end->flag |= EXT4_IO_END_ERROR; | ||
195 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | ||
196 | "(offset %llu size %ld starting block %llu)", | ||
197 | inode->i_ino, | ||
198 | (unsigned long long) io_end->offset, | ||
199 | (long) io_end->size, | ||
200 | (unsigned long long) err_block); | ||
201 | } | ||
202 | |||
203 | for (i = 0; i < io_end->num_io_pages; i++) { | ||
204 | struct page *page = io_end->pages[i]->p_page; | ||
205 | struct buffer_head *bh, *head; | ||
206 | int partial_write = 0; | ||
207 | |||
208 | head = page_buffers(page); | ||
209 | if (error) | ||
210 | SetPageError(page); | ||
211 | BUG_ON(!head); | ||
212 | if (head->b_size == PAGE_CACHE_SIZE) | ||
213 | clear_buffer_dirty(head); | ||
214 | else { | ||
215 | loff_t offset; | ||
216 | loff_t io_end_offset = io_end->offset + io_end->size; | ||
217 | |||
218 | offset = (sector_t) page->index << PAGE_CACHE_SHIFT; | ||
219 | bh = head; | ||
220 | do { | ||
221 | if ((offset >= io_end->offset) && | ||
222 | (offset+bh->b_size <= io_end_offset)) { | ||
223 | if (error) | ||
224 | buffer_io_error(bh); | ||
225 | |||
226 | clear_buffer_dirty(bh); | ||
227 | } | ||
228 | if (buffer_delay(bh)) | ||
229 | partial_write = 1; | ||
230 | else if (!buffer_mapped(bh)) | ||
231 | clear_buffer_dirty(bh); | ||
232 | else if (buffer_dirty(bh)) | ||
233 | partial_write = 1; | ||
234 | offset += bh->b_size; | ||
235 | bh = bh->b_this_page; | ||
236 | } while (bh != head); | ||
237 | } | ||
238 | |||
239 | if (--io_end->pages[i]->p_count == 0) { | ||
240 | struct page *page = io_end->pages[i]->p_page; | ||
241 | |||
242 | end_page_writeback(page); | ||
243 | put_page(page); | ||
244 | kmem_cache_free(io_page_cachep, io_end->pages[i]); | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * If this is a partial write which happened to make | ||
249 | * all buffers uptodate then we can optimize away a | ||
250 | * bogus readpage() for the next read(). Here we | ||
251 | * 'discover' whether the page went uptodate as a | ||
252 | * result of this (potentially partial) write. | ||
253 | */ | ||
254 | if (!partial_write) | ||
255 | SetPageUptodate(page); | ||
256 | } | ||
257 | |||
258 | io_end->num_io_pages = 0; | ||
259 | |||
260 | /* Add the io_end to per-inode completed io list*/ | ||
261 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
262 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
263 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
264 | |||
265 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
266 | /* queue the work to convert unwritten extents to written */ | ||
267 | queue_work(wq, &io_end->work); | ||
268 | } | ||
269 | |||
270 | void ext4_io_submit(struct ext4_io_submit *io) | ||
271 | { | ||
272 | struct bio *bio = io->io_bio; | ||
273 | |||
274 | if (bio) { | ||
275 | bio_get(io->io_bio); | ||
276 | submit_bio(io->io_op, io->io_bio); | ||
277 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); | ||
278 | bio_put(io->io_bio); | ||
279 | } | ||
280 | io->io_bio = 0; | ||
281 | io->io_op = 0; | ||
282 | io->io_end = 0; | ||
283 | } | ||
284 | |||
285 | static int io_submit_init(struct ext4_io_submit *io, | ||
286 | struct inode *inode, | ||
287 | struct writeback_control *wbc, | ||
288 | struct buffer_head *bh) | ||
289 | { | ||
290 | ext4_io_end_t *io_end; | ||
291 | struct page *page = bh->b_page; | ||
292 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | ||
293 | struct bio *bio; | ||
294 | |||
295 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
296 | if (!io_end) | ||
297 | return -ENOMEM; | ||
298 | do { | ||
299 | bio = bio_alloc(GFP_NOIO, nvecs); | ||
300 | nvecs >>= 1; | ||
301 | } while (bio == NULL); | ||
302 | |||
303 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | ||
304 | bio->bi_bdev = bh->b_bdev; | ||
305 | bio->bi_private = io->io_end = io_end; | ||
306 | bio->bi_end_io = ext4_end_bio; | ||
307 | |||
308 | io_end->inode = inode; | ||
309 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
310 | |||
311 | io->io_bio = bio; | ||
312 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? | ||
313 | WRITE_SYNC_PLUG : WRITE); | ||
314 | io->io_next_block = bh->b_blocknr; | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | static int io_submit_add_bh(struct ext4_io_submit *io, | ||
319 | struct ext4_io_page *io_page, | ||
320 | struct inode *inode, | ||
321 | struct writeback_control *wbc, | ||
322 | struct buffer_head *bh) | ||
323 | { | ||
324 | ext4_io_end_t *io_end; | ||
325 | int ret; | ||
326 | |||
327 | if (buffer_new(bh)) { | ||
328 | clear_buffer_new(bh); | ||
329 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
330 | } | ||
331 | |||
332 | if (!buffer_mapped(bh) || buffer_delay(bh)) { | ||
333 | if (!buffer_mapped(bh)) | ||
334 | clear_buffer_dirty(bh); | ||
335 | if (io->io_bio) | ||
336 | ext4_io_submit(io); | ||
337 | return 0; | ||
338 | } | ||
339 | |||
340 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | ||
341 | submit_and_retry: | ||
342 | ext4_io_submit(io); | ||
343 | } | ||
344 | if (io->io_bio == NULL) { | ||
345 | ret = io_submit_init(io, inode, wbc, bh); | ||
346 | if (ret) | ||
347 | return ret; | ||
348 | } | ||
349 | io_end = io->io_end; | ||
350 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | ||
351 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | ||
352 | goto submit_and_retry; | ||
353 | if (buffer_uninit(bh)) | ||
354 | io->io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
355 | io->io_end->size += bh->b_size; | ||
356 | io->io_next_block++; | ||
357 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
358 | if (ret != bh->b_size) | ||
359 | goto submit_and_retry; | ||
360 | if ((io_end->num_io_pages == 0) || | ||
361 | (io_end->pages[io_end->num_io_pages-1] != io_page)) { | ||
362 | io_end->pages[io_end->num_io_pages++] = io_page; | ||
363 | io_page->p_count++; | ||
364 | } | ||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | int ext4_bio_write_page(struct ext4_io_submit *io, | ||
369 | struct page *page, | ||
370 | int len, | ||
371 | struct writeback_control *wbc) | ||
372 | { | ||
373 | struct inode *inode = page->mapping->host; | ||
374 | unsigned block_start, block_end, blocksize; | ||
375 | struct ext4_io_page *io_page; | ||
376 | struct buffer_head *bh, *head; | ||
377 | int ret = 0; | ||
378 | |||
379 | blocksize = 1 << inode->i_blkbits; | ||
380 | |||
381 | BUG_ON(PageWriteback(page)); | ||
382 | set_page_writeback(page); | ||
383 | ClearPageError(page); | ||
384 | |||
385 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | ||
386 | if (!io_page) { | ||
387 | set_page_dirty(page); | ||
388 | unlock_page(page); | ||
389 | return -ENOMEM; | ||
390 | } | ||
391 | io_page->p_page = page; | ||
392 | io_page->p_count = 0; | ||
393 | get_page(page); | ||
394 | |||
395 | for (bh = head = page_buffers(page), block_start = 0; | ||
396 | bh != head || !block_start; | ||
397 | block_start = block_end, bh = bh->b_this_page) { | ||
398 | block_end = block_start + blocksize; | ||
399 | if (block_start >= len) { | ||
400 | clear_buffer_dirty(bh); | ||
401 | set_buffer_uptodate(bh); | ||
402 | continue; | ||
403 | } | ||
404 | ret = io_submit_add_bh(io, io_page, inode, wbc, bh); | ||
405 | if (ret) { | ||
406 | /* | ||
407 | * We only get here on ENOMEM. Not much else | ||
408 | * we can do but mark the page as dirty, and | ||
409 | * better luck next time. | ||
410 | */ | ||
411 | set_page_dirty(page); | ||
412 | break; | ||
413 | } | ||
414 | } | ||
415 | unlock_page(page); | ||
416 | /* | ||
417 | * If the page was truncated before we could do the writeback, | ||
418 | * or we had a memory allocation error while trying to write | ||
419 | * the first buffer head, we won't have submitted any pages for | ||
420 | * I/O. In that case we need to make sure we've cleared the | ||
421 | * PageWriteback bit from the page to prevent the system from | ||
422 | * wedging later on. | ||
423 | */ | ||
424 | if (io_page->p_count == 0) { | ||
425 | put_page(page); | ||
426 | end_page_writeback(page); | ||
427 | kmem_cache_free(io_page_cachep, io_page); | ||
428 | } | ||
429 | return ret; | ||
430 | } | ||
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ca5c8aa00a2f..dc963929de65 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
226 | } | 226 | } |
227 | 227 | ||
228 | /* Zero out all of the reserved backup group descriptor table blocks */ | 228 | /* Zero out all of the reserved backup group descriptor table blocks */ |
229 | for (i = 0, bit = gdblocks + 1, block = start + bit; | 229 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", |
230 | i < reserved_gdb; i++, block++, bit++) { | 230 | block, sbi->s_itb_per_group); |
231 | struct buffer_head *gdb; | 231 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
232 | 232 | GFP_NOFS); | |
233 | ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); | 233 | if (err) |
234 | 234 | goto exit_bh; | |
235 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | ||
236 | goto exit_bh; | ||
237 | 235 | ||
238 | if (IS_ERR(gdb = bclean(handle, sb, block))) { | ||
239 | err = PTR_ERR(gdb); | ||
240 | goto exit_bh; | ||
241 | } | ||
242 | ext4_handle_dirty_metadata(handle, NULL, gdb); | ||
243 | ext4_set_bit(bit, bh->b_data); | ||
244 | brelse(gdb); | ||
245 | } | ||
246 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | 236 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, |
247 | input->block_bitmap - start); | 237 | input->block_bitmap - start); |
248 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | 238 | ext4_set_bit(input->block_bitmap - start, bh->b_data); |
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
251 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | 241 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); |
252 | 242 | ||
253 | /* Zero out all of the inode table blocks */ | 243 | /* Zero out all of the inode table blocks */ |
254 | for (i = 0, block = input->inode_table, bit = block - start; | 244 | block = input->inode_table; |
255 | i < sbi->s_itb_per_group; i++, bit++, block++) { | 245 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", |
256 | struct buffer_head *it; | 246 | block, sbi->s_itb_per_group); |
257 | 247 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | |
258 | ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); | 248 | if (err) |
259 | 249 | goto exit_bh; | |
260 | if ((err = extend_or_restart_transaction(handle, 1, bh))) | ||
261 | goto exit_bh; | ||
262 | |||
263 | if (IS_ERR(it = bclean(handle, sb, block))) { | ||
264 | err = PTR_ERR(it); | ||
265 | goto exit_bh; | ||
266 | } | ||
267 | ext4_handle_dirty_metadata(handle, NULL, it); | ||
268 | brelse(it); | ||
269 | ext4_set_bit(bit, bh->b_data); | ||
270 | } | ||
271 | 250 | ||
272 | if ((err = extend_or_restart_transaction(handle, 2, bh))) | 251 | if ((err = extend_or_restart_transaction(handle, 2, bh))) |
273 | goto exit_bh; | 252 | goto exit_bh; |
274 | 253 | ||
275 | mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); | 254 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, |
255 | bh->b_data); | ||
276 | ext4_handle_dirty_metadata(handle, NULL, bh); | 256 | ext4_handle_dirty_metadata(handle, NULL, bh); |
277 | brelse(bh); | 257 | brelse(bh); |
278 | /* Mark unused entries in inode bitmap used */ | 258 | /* Mark unused entries in inode bitmap used */ |
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
283 | goto exit_journal; | 263 | goto exit_journal; |
284 | } | 264 | } |
285 | 265 | ||
286 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 266 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
287 | bh->b_data); | 267 | bh->b_data); |
288 | ext4_handle_dirty_metadata(handle, NULL, bh); | 268 | ext4_handle_dirty_metadata(handle, NULL, bh); |
289 | exit_bh: | 269 | exit_bh: |
290 | brelse(bh); | 270 | brelse(bh); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8ecc1e590303..0348ce066592 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -40,6 +40,9 @@ | |||
40 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
41 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
42 | 42 | ||
43 | #include <linux/kthread.h> | ||
44 | #include <linux/freezer.h> | ||
45 | |||
43 | #include "ext4.h" | 46 | #include "ext4.h" |
44 | #include "ext4_jbd2.h" | 47 | #include "ext4_jbd2.h" |
45 | #include "xattr.h" | 48 | #include "xattr.h" |
@@ -49,8 +52,11 @@ | |||
49 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
50 | #include <trace/events/ext4.h> | 53 | #include <trace/events/ext4.h> |
51 | 54 | ||
52 | struct proc_dir_entry *ext4_proc_root; | 55 | static struct proc_dir_entry *ext4_proc_root; |
53 | static struct kset *ext4_kset; | 56 | static struct kset *ext4_kset; |
57 | struct ext4_lazy_init *ext4_li_info; | ||
58 | struct mutex ext4_li_mtx; | ||
59 | struct ext4_features *ext4_feat; | ||
54 | 60 | ||
55 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
56 | unsigned long journal_devnum); | 62 | unsigned long journal_devnum); |
@@ -69,6 +75,8 @@ static void ext4_write_super(struct super_block *sb); | |||
69 | static int ext4_freeze(struct super_block *sb); | 75 | static int ext4_freeze(struct super_block *sb); |
70 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, | 76 | static int ext4_get_sb(struct file_system_type *fs_type, int flags, |
71 | const char *dev_name, void *data, struct vfsmount *mnt); | 77 | const char *dev_name, void *data, struct vfsmount *mnt); |
78 | static void ext4_destroy_lazyinit_thread(void); | ||
79 | static void ext4_unregister_li_request(struct super_block *sb); | ||
72 | 80 | ||
73 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 81 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
74 | static struct file_system_type ext3_fs_type = { | 82 | static struct file_system_type ext3_fs_type = { |
@@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb) | |||
701 | struct ext4_super_block *es = sbi->s_es; | 709 | struct ext4_super_block *es = sbi->s_es; |
702 | int i, err; | 710 | int i, err; |
703 | 711 | ||
712 | ext4_unregister_li_request(sb); | ||
704 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 713 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
705 | 714 | ||
706 | flush_workqueue(sbi->dio_unwritten_wq); | 715 | flush_workqueue(sbi->dio_unwritten_wq); |
@@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb) | |||
717 | ext4_abort(sb, "Couldn't clean up the journal"); | 726 | ext4_abort(sb, "Couldn't clean up the journal"); |
718 | } | 727 | } |
719 | 728 | ||
729 | del_timer(&sbi->s_err_report); | ||
720 | ext4_release_system_zone(sb); | 730 | ext4_release_system_zone(sb); |
721 | ext4_mb_release(sb); | 731 | ext4_mb_release(sb); |
722 | ext4_ext_release(sb); | 732 | ext4_ext_release(sb); |
@@ -1042,6 +1052,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1042 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) | 1052 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) |
1043 | seq_puts(seq, ",block_validity"); | 1053 | seq_puts(seq, ",block_validity"); |
1044 | 1054 | ||
1055 | if (!test_opt(sb, INIT_INODE_TABLE)) | ||
1056 | seq_puts(seq, ",noinit_inode_table"); | ||
1057 | else if (sbi->s_li_wait_mult) | ||
1058 | seq_printf(seq, ",init_inode_table=%u", | ||
1059 | (unsigned) sbi->s_li_wait_mult); | ||
1060 | |||
1045 | ext4_show_quota_options(seq, sb); | 1061 | ext4_show_quota_options(seq, sb); |
1046 | 1062 | ||
1047 | return 0; | 1063 | return 0; |
@@ -1170,6 +1186,7 @@ static const struct super_operations ext4_sops = { | |||
1170 | .quota_write = ext4_quota_write, | 1186 | .quota_write = ext4_quota_write, |
1171 | #endif | 1187 | #endif |
1172 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1188 | .bdev_try_to_free_page = bdev_try_to_free_page, |
1189 | .trim_fs = ext4_trim_fs | ||
1173 | }; | 1190 | }; |
1174 | 1191 | ||
1175 | static const struct super_operations ext4_nojournal_sops = { | 1192 | static const struct super_operations ext4_nojournal_sops = { |
@@ -1216,6 +1233,7 @@ enum { | |||
1216 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1233 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1217 | Opt_dioread_nolock, Opt_dioread_lock, | 1234 | Opt_dioread_nolock, Opt_dioread_lock, |
1218 | Opt_discard, Opt_nodiscard, | 1235 | Opt_discard, Opt_nodiscard, |
1236 | Opt_init_inode_table, Opt_noinit_inode_table, | ||
1219 | }; | 1237 | }; |
1220 | 1238 | ||
1221 | static const match_table_t tokens = { | 1239 | static const match_table_t tokens = { |
@@ -1286,6 +1304,9 @@ static const match_table_t tokens = { | |||
1286 | {Opt_dioread_lock, "dioread_lock"}, | 1304 | {Opt_dioread_lock, "dioread_lock"}, |
1287 | {Opt_discard, "discard"}, | 1305 | {Opt_discard, "discard"}, |
1288 | {Opt_nodiscard, "nodiscard"}, | 1306 | {Opt_nodiscard, "nodiscard"}, |
1307 | {Opt_init_inode_table, "init_itable=%u"}, | ||
1308 | {Opt_init_inode_table, "init_itable"}, | ||
1309 | {Opt_noinit_inode_table, "noinit_itable"}, | ||
1289 | {Opt_err, NULL}, | 1310 | {Opt_err, NULL}, |
1290 | }; | 1311 | }; |
1291 | 1312 | ||
@@ -1756,6 +1777,20 @@ set_qf_format: | |||
1756 | case Opt_dioread_lock: | 1777 | case Opt_dioread_lock: |
1757 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); | 1778 | clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); |
1758 | break; | 1779 | break; |
1780 | case Opt_init_inode_table: | ||
1781 | set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
1782 | if (args[0].from) { | ||
1783 | if (match_int(&args[0], &option)) | ||
1784 | return 0; | ||
1785 | } else | ||
1786 | option = EXT4_DEF_LI_WAIT_MULT; | ||
1787 | if (option < 0) | ||
1788 | return 0; | ||
1789 | sbi->s_li_wait_mult = option; | ||
1790 | break; | ||
1791 | case Opt_noinit_inode_table: | ||
1792 | clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
1793 | break; | ||
1759 | default: | 1794 | default: |
1760 | ext4_msg(sb, KERN_ERR, | 1795 | ext4_msg(sb, KERN_ERR, |
1761 | "Unrecognized mount option \"%s\" " | 1796 | "Unrecognized mount option \"%s\" " |
@@ -1939,7 +1974,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, | |||
1939 | } | 1974 | } |
1940 | 1975 | ||
1941 | /* Called at mount-time, super-block is locked */ | 1976 | /* Called at mount-time, super-block is locked */ |
1942 | static int ext4_check_descriptors(struct super_block *sb) | 1977 | static int ext4_check_descriptors(struct super_block *sb, |
1978 | ext4_group_t *first_not_zeroed) | ||
1943 | { | 1979 | { |
1944 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1980 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1945 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); | 1981 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); |
@@ -1948,7 +1984,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1948 | ext4_fsblk_t inode_bitmap; | 1984 | ext4_fsblk_t inode_bitmap; |
1949 | ext4_fsblk_t inode_table; | 1985 | ext4_fsblk_t inode_table; |
1950 | int flexbg_flag = 0; | 1986 | int flexbg_flag = 0; |
1951 | ext4_group_t i; | 1987 | ext4_group_t i, grp = sbi->s_groups_count; |
1952 | 1988 | ||
1953 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1989 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
1954 | flexbg_flag = 1; | 1990 | flexbg_flag = 1; |
@@ -1964,6 +2000,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1964 | last_block = first_block + | 2000 | last_block = first_block + |
1965 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2001 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); |
1966 | 2002 | ||
2003 | if ((grp == sbi->s_groups_count) && | ||
2004 | !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2005 | grp = i; | ||
2006 | |||
1967 | block_bitmap = ext4_block_bitmap(sb, gdp); | 2007 | block_bitmap = ext4_block_bitmap(sb, gdp); |
1968 | if (block_bitmap < first_block || block_bitmap > last_block) { | 2008 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1969 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2009 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
@@ -2001,6 +2041,8 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
2001 | if (!flexbg_flag) | 2041 | if (!flexbg_flag) |
2002 | first_block += EXT4_BLOCKS_PER_GROUP(sb); | 2042 | first_block += EXT4_BLOCKS_PER_GROUP(sb); |
2003 | } | 2043 | } |
2044 | if (NULL != first_not_zeroed) | ||
2045 | *first_not_zeroed = grp; | ||
2004 | 2046 | ||
2005 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); | 2047 | ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); |
2006 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); | 2048 | sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); |
@@ -2373,6 +2415,7 @@ static struct ext4_attr ext4_attr_##_name = { \ | |||
2373 | #define EXT4_ATTR(name, mode, show, store) \ | 2415 | #define EXT4_ATTR(name, mode, show, store) \ |
2374 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2416 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
2375 | 2417 | ||
2418 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) | ||
2376 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) | 2419 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) |
2377 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) | 2420 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) |
2378 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2421 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
@@ -2409,6 +2452,16 @@ static struct attribute *ext4_attrs[] = { | |||
2409 | NULL, | 2452 | NULL, |
2410 | }; | 2453 | }; |
2411 | 2454 | ||
2455 | /* Features this copy of ext4 supports */ | ||
2456 | EXT4_INFO_ATTR(lazy_itable_init); | ||
2457 | EXT4_INFO_ATTR(batched_discard); | ||
2458 | |||
2459 | static struct attribute *ext4_feat_attrs[] = { | ||
2460 | ATTR_LIST(lazy_itable_init), | ||
2461 | ATTR_LIST(batched_discard), | ||
2462 | NULL, | ||
2463 | }; | ||
2464 | |||
2412 | static ssize_t ext4_attr_show(struct kobject *kobj, | 2465 | static ssize_t ext4_attr_show(struct kobject *kobj, |
2413 | struct attribute *attr, char *buf) | 2466 | struct attribute *attr, char *buf) |
2414 | { | 2467 | { |
@@ -2437,7 +2490,6 @@ static void ext4_sb_release(struct kobject *kobj) | |||
2437 | complete(&sbi->s_kobj_unregister); | 2490 | complete(&sbi->s_kobj_unregister); |
2438 | } | 2491 | } |
2439 | 2492 | ||
2440 | |||
2441 | static const struct sysfs_ops ext4_attr_ops = { | 2493 | static const struct sysfs_ops ext4_attr_ops = { |
2442 | .show = ext4_attr_show, | 2494 | .show = ext4_attr_show, |
2443 | .store = ext4_attr_store, | 2495 | .store = ext4_attr_store, |
@@ -2449,6 +2501,17 @@ static struct kobj_type ext4_ktype = { | |||
2449 | .release = ext4_sb_release, | 2501 | .release = ext4_sb_release, |
2450 | }; | 2502 | }; |
2451 | 2503 | ||
2504 | static void ext4_feat_release(struct kobject *kobj) | ||
2505 | { | ||
2506 | complete(&ext4_feat->f_kobj_unregister); | ||
2507 | } | ||
2508 | |||
2509 | static struct kobj_type ext4_feat_ktype = { | ||
2510 | .default_attrs = ext4_feat_attrs, | ||
2511 | .sysfs_ops = &ext4_attr_ops, | ||
2512 | .release = ext4_feat_release, | ||
2513 | }; | ||
2514 | |||
2452 | /* | 2515 | /* |
2453 | * Check whether this filesystem can be mounted based on | 2516 | * Check whether this filesystem can be mounted based on |
2454 | * the features present and the RDONLY/RDWR mount requested. | 2517 | * the features present and the RDONLY/RDWR mount requested. |
@@ -2539,6 +2602,372 @@ static void print_daily_error_info(unsigned long arg) | |||
2539 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ | 2602 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ |
2540 | } | 2603 | } |
2541 | 2604 | ||
2605 | static void ext4_lazyinode_timeout(unsigned long data) | ||
2606 | { | ||
2607 | struct task_struct *p = (struct task_struct *)data; | ||
2608 | wake_up_process(p); | ||
2609 | } | ||
2610 | |||
2611 | /* Find next suitable group and run ext4_init_inode_table */ | ||
2612 | static int ext4_run_li_request(struct ext4_li_request *elr) | ||
2613 | { | ||
2614 | struct ext4_group_desc *gdp = NULL; | ||
2615 | ext4_group_t group, ngroups; | ||
2616 | struct super_block *sb; | ||
2617 | unsigned long timeout = 0; | ||
2618 | int ret = 0; | ||
2619 | |||
2620 | sb = elr->lr_super; | ||
2621 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
2622 | |||
2623 | for (group = elr->lr_next_group; group < ngroups; group++) { | ||
2624 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2625 | if (!gdp) { | ||
2626 | ret = 1; | ||
2627 | break; | ||
2628 | } | ||
2629 | |||
2630 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2631 | break; | ||
2632 | } | ||
2633 | |||
2634 | if (group == ngroups) | ||
2635 | ret = 1; | ||
2636 | |||
2637 | if (!ret) { | ||
2638 | timeout = jiffies; | ||
2639 | ret = ext4_init_inode_table(sb, group, | ||
2640 | elr->lr_timeout ? 0 : 1); | ||
2641 | if (elr->lr_timeout == 0) { | ||
2642 | timeout = jiffies - timeout; | ||
2643 | if (elr->lr_sbi->s_li_wait_mult) | ||
2644 | timeout *= elr->lr_sbi->s_li_wait_mult; | ||
2645 | else | ||
2646 | timeout *= 20; | ||
2647 | elr->lr_timeout = timeout; | ||
2648 | } | ||
2649 | elr->lr_next_sched = jiffies + elr->lr_timeout; | ||
2650 | elr->lr_next_group = group + 1; | ||
2651 | } | ||
2652 | |||
2653 | return ret; | ||
2654 | } | ||
2655 | |||
2656 | /* | ||
2657 | * Remove lr_request from the list_request and free the | ||
2658 | * request tructure. Should be called with li_list_mtx held | ||
2659 | */ | ||
2660 | static void ext4_remove_li_request(struct ext4_li_request *elr) | ||
2661 | { | ||
2662 | struct ext4_sb_info *sbi; | ||
2663 | |||
2664 | if (!elr) | ||
2665 | return; | ||
2666 | |||
2667 | sbi = elr->lr_sbi; | ||
2668 | |||
2669 | list_del(&elr->lr_request); | ||
2670 | sbi->s_li_request = NULL; | ||
2671 | kfree(elr); | ||
2672 | } | ||
2673 | |||
2674 | static void ext4_unregister_li_request(struct super_block *sb) | ||
2675 | { | ||
2676 | struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; | ||
2677 | |||
2678 | if (!ext4_li_info) | ||
2679 | return; | ||
2680 | |||
2681 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2682 | ext4_remove_li_request(elr); | ||
2683 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2684 | } | ||
2685 | |||
2686 | /* | ||
2687 | * This is the function where ext4lazyinit thread lives. It walks | ||
2688 | * through the request list searching for next scheduled filesystem. | ||
2689 | * When such a fs is found, run the lazy initialization request | ||
2690 | * (ext4_rn_li_request) and keep track of the time spend in this | ||
2691 | * function. Based on that time we compute next schedule time of | ||
2692 | * the request. When walking through the list is complete, compute | ||
2693 | * next waking time and put itself into sleep. | ||
2694 | */ | ||
2695 | static int ext4_lazyinit_thread(void *arg) | ||
2696 | { | ||
2697 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; | ||
2698 | struct list_head *pos, *n; | ||
2699 | struct ext4_li_request *elr; | ||
2700 | unsigned long next_wakeup; | ||
2701 | DEFINE_WAIT(wait); | ||
2702 | int ret; | ||
2703 | |||
2704 | BUG_ON(NULL == eli); | ||
2705 | |||
2706 | eli->li_timer.data = (unsigned long)current; | ||
2707 | eli->li_timer.function = ext4_lazyinode_timeout; | ||
2708 | |||
2709 | eli->li_task = current; | ||
2710 | wake_up(&eli->li_wait_task); | ||
2711 | |||
2712 | cont_thread: | ||
2713 | while (true) { | ||
2714 | next_wakeup = MAX_JIFFY_OFFSET; | ||
2715 | |||
2716 | mutex_lock(&eli->li_list_mtx); | ||
2717 | if (list_empty(&eli->li_request_list)) { | ||
2718 | mutex_unlock(&eli->li_list_mtx); | ||
2719 | goto exit_thread; | ||
2720 | } | ||
2721 | |||
2722 | list_for_each_safe(pos, n, &eli->li_request_list) { | ||
2723 | elr = list_entry(pos, struct ext4_li_request, | ||
2724 | lr_request); | ||
2725 | |||
2726 | if (time_after_eq(jiffies, elr->lr_next_sched)) | ||
2727 | ret = ext4_run_li_request(elr); | ||
2728 | |||
2729 | if (ret) { | ||
2730 | ret = 0; | ||
2731 | ext4_remove_li_request(elr); | ||
2732 | continue; | ||
2733 | } | ||
2734 | |||
2735 | if (time_before(elr->lr_next_sched, next_wakeup)) | ||
2736 | next_wakeup = elr->lr_next_sched; | ||
2737 | } | ||
2738 | mutex_unlock(&eli->li_list_mtx); | ||
2739 | |||
2740 | if (freezing(current)) | ||
2741 | refrigerator(); | ||
2742 | |||
2743 | if (time_after_eq(jiffies, next_wakeup)) { | ||
2744 | cond_resched(); | ||
2745 | continue; | ||
2746 | } | ||
2747 | |||
2748 | eli->li_timer.expires = next_wakeup; | ||
2749 | add_timer(&eli->li_timer); | ||
2750 | prepare_to_wait(&eli->li_wait_daemon, &wait, | ||
2751 | TASK_INTERRUPTIBLE); | ||
2752 | if (time_before(jiffies, next_wakeup)) | ||
2753 | schedule(); | ||
2754 | finish_wait(&eli->li_wait_daemon, &wait); | ||
2755 | } | ||
2756 | |||
2757 | exit_thread: | ||
2758 | /* | ||
2759 | * It looks like the request list is empty, but we need | ||
2760 | * to check it under the li_list_mtx lock, to prevent any | ||
2761 | * additions into it, and of course we should lock ext4_li_mtx | ||
2762 | * to atomically free the list and ext4_li_info, because at | ||
2763 | * this point another ext4 filesystem could be registering | ||
2764 | * new one. | ||
2765 | */ | ||
2766 | mutex_lock(&ext4_li_mtx); | ||
2767 | mutex_lock(&eli->li_list_mtx); | ||
2768 | if (!list_empty(&eli->li_request_list)) { | ||
2769 | mutex_unlock(&eli->li_list_mtx); | ||
2770 | mutex_unlock(&ext4_li_mtx); | ||
2771 | goto cont_thread; | ||
2772 | } | ||
2773 | mutex_unlock(&eli->li_list_mtx); | ||
2774 | del_timer_sync(&ext4_li_info->li_timer); | ||
2775 | eli->li_task = NULL; | ||
2776 | wake_up(&eli->li_wait_task); | ||
2777 | |||
2778 | kfree(ext4_li_info); | ||
2779 | ext4_li_info = NULL; | ||
2780 | mutex_unlock(&ext4_li_mtx); | ||
2781 | |||
2782 | return 0; | ||
2783 | } | ||
2784 | |||
2785 | static void ext4_clear_request_list(void) | ||
2786 | { | ||
2787 | struct list_head *pos, *n; | ||
2788 | struct ext4_li_request *elr; | ||
2789 | |||
2790 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2791 | if (list_empty(&ext4_li_info->li_request_list)) | ||
2792 | return; | ||
2793 | |||
2794 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { | ||
2795 | elr = list_entry(pos, struct ext4_li_request, | ||
2796 | lr_request); | ||
2797 | ext4_remove_li_request(elr); | ||
2798 | } | ||
2799 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2800 | } | ||
2801 | |||
2802 | static int ext4_run_lazyinit_thread(void) | ||
2803 | { | ||
2804 | struct task_struct *t; | ||
2805 | |||
2806 | t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); | ||
2807 | if (IS_ERR(t)) { | ||
2808 | int err = PTR_ERR(t); | ||
2809 | ext4_clear_request_list(); | ||
2810 | del_timer_sync(&ext4_li_info->li_timer); | ||
2811 | kfree(ext4_li_info); | ||
2812 | ext4_li_info = NULL; | ||
2813 | printk(KERN_CRIT "EXT4: error %d creating inode table " | ||
2814 | "initialization thread\n", | ||
2815 | err); | ||
2816 | return err; | ||
2817 | } | ||
2818 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; | ||
2819 | |||
2820 | wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL); | ||
2821 | return 0; | ||
2822 | } | ||
2823 | |||
2824 | /* | ||
2825 | * Check whether it make sense to run itable init. thread or not. | ||
2826 | * If there is at least one uninitialized inode table, return | ||
2827 | * corresponding group number, else the loop goes through all | ||
2828 | * groups and return total number of groups. | ||
2829 | */ | ||
2830 | static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) | ||
2831 | { | ||
2832 | ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; | ||
2833 | struct ext4_group_desc *gdp = NULL; | ||
2834 | |||
2835 | for (group = 0; group < ngroups; group++) { | ||
2836 | gdp = ext4_get_group_desc(sb, group, NULL); | ||
2837 | if (!gdp) | ||
2838 | continue; | ||
2839 | |||
2840 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | ||
2841 | break; | ||
2842 | } | ||
2843 | |||
2844 | return group; | ||
2845 | } | ||
2846 | |||
2847 | static int ext4_li_info_new(void) | ||
2848 | { | ||
2849 | struct ext4_lazy_init *eli = NULL; | ||
2850 | |||
2851 | eli = kzalloc(sizeof(*eli), GFP_KERNEL); | ||
2852 | if (!eli) | ||
2853 | return -ENOMEM; | ||
2854 | |||
2855 | eli->li_task = NULL; | ||
2856 | INIT_LIST_HEAD(&eli->li_request_list); | ||
2857 | mutex_init(&eli->li_list_mtx); | ||
2858 | |||
2859 | init_waitqueue_head(&eli->li_wait_daemon); | ||
2860 | init_waitqueue_head(&eli->li_wait_task); | ||
2861 | init_timer(&eli->li_timer); | ||
2862 | eli->li_state |= EXT4_LAZYINIT_QUIT; | ||
2863 | |||
2864 | ext4_li_info = eli; | ||
2865 | |||
2866 | return 0; | ||
2867 | } | ||
2868 | |||
2869 | static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, | ||
2870 | ext4_group_t start) | ||
2871 | { | ||
2872 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2873 | struct ext4_li_request *elr; | ||
2874 | unsigned long rnd; | ||
2875 | |||
2876 | elr = kzalloc(sizeof(*elr), GFP_KERNEL); | ||
2877 | if (!elr) | ||
2878 | return NULL; | ||
2879 | |||
2880 | elr->lr_super = sb; | ||
2881 | elr->lr_sbi = sbi; | ||
2882 | elr->lr_next_group = start; | ||
2883 | |||
2884 | /* | ||
2885 | * Randomize first schedule time of the request to | ||
2886 | * spread the inode table initialization requests | ||
2887 | * better. | ||
2888 | */ | ||
2889 | get_random_bytes(&rnd, sizeof(rnd)); | ||
2890 | elr->lr_next_sched = jiffies + (unsigned long)rnd % | ||
2891 | (EXT4_DEF_LI_MAX_START_DELAY * HZ); | ||
2892 | |||
2893 | return elr; | ||
2894 | } | ||
2895 | |||
2896 | static int ext4_register_li_request(struct super_block *sb, | ||
2897 | ext4_group_t first_not_zeroed) | ||
2898 | { | ||
2899 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2900 | struct ext4_li_request *elr; | ||
2901 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
2902 | int ret; | ||
2903 | |||
2904 | if (sbi->s_li_request != NULL) | ||
2905 | return 0; | ||
2906 | |||
2907 | if (first_not_zeroed == ngroups || | ||
2908 | (sb->s_flags & MS_RDONLY) || | ||
2909 | !test_opt(sb, INIT_INODE_TABLE)) { | ||
2910 | sbi->s_li_request = NULL; | ||
2911 | return 0; | ||
2912 | } | ||
2913 | |||
2914 | if (first_not_zeroed == ngroups) { | ||
2915 | sbi->s_li_request = NULL; | ||
2916 | return 0; | ||
2917 | } | ||
2918 | |||
2919 | elr = ext4_li_request_new(sb, first_not_zeroed); | ||
2920 | if (!elr) | ||
2921 | return -ENOMEM; | ||
2922 | |||
2923 | mutex_lock(&ext4_li_mtx); | ||
2924 | |||
2925 | if (NULL == ext4_li_info) { | ||
2926 | ret = ext4_li_info_new(); | ||
2927 | if (ret) | ||
2928 | goto out; | ||
2929 | } | ||
2930 | |||
2931 | mutex_lock(&ext4_li_info->li_list_mtx); | ||
2932 | list_add(&elr->lr_request, &ext4_li_info->li_request_list); | ||
2933 | mutex_unlock(&ext4_li_info->li_list_mtx); | ||
2934 | |||
2935 | sbi->s_li_request = elr; | ||
2936 | |||
2937 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { | ||
2938 | ret = ext4_run_lazyinit_thread(); | ||
2939 | if (ret) | ||
2940 | goto out; | ||
2941 | } | ||
2942 | out: | ||
2943 | mutex_unlock(&ext4_li_mtx); | ||
2944 | if (ret) | ||
2945 | kfree(elr); | ||
2946 | return ret; | ||
2947 | } | ||
2948 | |||
2949 | /* | ||
2950 | * We do not need to lock anything since this is called on | ||
2951 | * module unload. | ||
2952 | */ | ||
2953 | static void ext4_destroy_lazyinit_thread(void) | ||
2954 | { | ||
2955 | /* | ||
2956 | * If thread exited earlier | ||
2957 | * there's nothing to be done. | ||
2958 | */ | ||
2959 | if (!ext4_li_info) | ||
2960 | return; | ||
2961 | |||
2962 | ext4_clear_request_list(); | ||
2963 | |||
2964 | while (ext4_li_info->li_task) { | ||
2965 | wake_up(&ext4_li_info->li_wait_daemon); | ||
2966 | wait_event(ext4_li_info->li_wait_task, | ||
2967 | ext4_li_info->li_task == NULL); | ||
2968 | } | ||
2969 | } | ||
2970 | |||
2542 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2971 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2543 | __releases(kernel_lock) | 2972 | __releases(kernel_lock) |
2544 | __acquires(kernel_lock) | 2973 | __acquires(kernel_lock) |
@@ -2564,6 +2993,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2564 | __u64 blocks_count; | 2993 | __u64 blocks_count; |
2565 | int err; | 2994 | int err; |
2566 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 2995 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
2996 | ext4_group_t first_not_zeroed; | ||
2567 | 2997 | ||
2568 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 2998 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
2569 | if (!sbi) | 2999 | if (!sbi) |
@@ -2624,6 +3054,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2624 | 3054 | ||
2625 | /* Set defaults before we parse the mount options */ | 3055 | /* Set defaults before we parse the mount options */ |
2626 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 3056 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
3057 | set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); | ||
2627 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 3058 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
2628 | set_opt(sbi->s_mount_opt, DEBUG); | 3059 | set_opt(sbi->s_mount_opt, DEBUG); |
2629 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { | 3060 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { |
@@ -2901,7 +3332,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2901 | goto failed_mount2; | 3332 | goto failed_mount2; |
2902 | } | 3333 | } |
2903 | } | 3334 | } |
2904 | if (!ext4_check_descriptors(sb)) { | 3335 | if (!ext4_check_descriptors(sb, &first_not_zeroed)) { |
2905 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); | 3336 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
2906 | goto failed_mount2; | 3337 | goto failed_mount2; |
2907 | } | 3338 | } |
@@ -3122,6 +3553,10 @@ no_journal: | |||
3122 | goto failed_mount4; | 3553 | goto failed_mount4; |
3123 | } | 3554 | } |
3124 | 3555 | ||
3556 | err = ext4_register_li_request(sb, first_not_zeroed); | ||
3557 | if (err) | ||
3558 | goto failed_mount4; | ||
3559 | |||
3125 | sbi->s_kobj.kset = ext4_kset; | 3560 | sbi->s_kobj.kset = ext4_kset; |
3126 | init_completion(&sbi->s_kobj_unregister); | 3561 | init_completion(&sbi->s_kobj_unregister); |
3127 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, | 3562 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, |
@@ -3461,7 +3896,7 @@ static int ext4_load_journal(struct super_block *sb, | |||
3461 | EXT4_SB(sb)->s_journal = journal; | 3896 | EXT4_SB(sb)->s_journal = journal; |
3462 | ext4_clear_journal_err(sb, es); | 3897 | ext4_clear_journal_err(sb, es); |
3463 | 3898 | ||
3464 | if (journal_devnum && | 3899 | if (!really_read_only && journal_devnum && |
3465 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 3900 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
3466 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 3901 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
3467 | 3902 | ||
@@ -3514,9 +3949,12 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3514 | else | 3949 | else |
3515 | es->s_kbytes_written = | 3950 | es->s_kbytes_written = |
3516 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | 3951 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); |
3517 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 3952 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) |
3953 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | ||
3518 | &EXT4_SB(sb)->s_freeblocks_counter)); | 3954 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3519 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | 3955 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) |
3956 | es->s_free_inodes_count = | ||
3957 | cpu_to_le32(percpu_counter_sum_positive( | ||
3520 | &EXT4_SB(sb)->s_freeinodes_counter)); | 3958 | &EXT4_SB(sb)->s_freeinodes_counter)); |
3521 | sb->s_dirt = 0; | 3959 | sb->s_dirt = 0; |
3522 | BUFFER_TRACE(sbh, "marking dirty"); | 3960 | BUFFER_TRACE(sbh, "marking dirty"); |
@@ -3835,6 +4273,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3835 | enable_quota = 1; | 4273 | enable_quota = 1; |
3836 | } | 4274 | } |
3837 | } | 4275 | } |
4276 | |||
4277 | /* | ||
4278 | * Reinitialize lazy itable initialization thread based on | ||
4279 | * current settings | ||
4280 | */ | ||
4281 | if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) | ||
4282 | ext4_unregister_li_request(sb); | ||
4283 | else { | ||
4284 | ext4_group_t first_not_zeroed; | ||
4285 | first_not_zeroed = ext4_has_uninit_itable(sb); | ||
4286 | ext4_register_li_request(sb, first_not_zeroed); | ||
4287 | } | ||
4288 | |||
3838 | ext4_setup_system_zone(sb); | 4289 | ext4_setup_system_zone(sb); |
3839 | if (sbi->s_journal == NULL) | 4290 | if (sbi->s_journal == NULL) |
3840 | ext4_commit_super(sb, 1); | 4291 | ext4_commit_super(sb, 1); |
@@ -4276,23 +4727,53 @@ static struct file_system_type ext4_fs_type = { | |||
4276 | .fs_flags = FS_REQUIRES_DEV, | 4727 | .fs_flags = FS_REQUIRES_DEV, |
4277 | }; | 4728 | }; |
4278 | 4729 | ||
4279 | static int __init init_ext4_fs(void) | 4730 | int __init ext4_init_feat_adverts(void) |
4731 | { | ||
4732 | struct ext4_features *ef; | ||
4733 | int ret = -ENOMEM; | ||
4734 | |||
4735 | ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); | ||
4736 | if (!ef) | ||
4737 | goto out; | ||
4738 | |||
4739 | ef->f_kobj.kset = ext4_kset; | ||
4740 | init_completion(&ef->f_kobj_unregister); | ||
4741 | ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, | ||
4742 | "features"); | ||
4743 | if (ret) { | ||
4744 | kfree(ef); | ||
4745 | goto out; | ||
4746 | } | ||
4747 | |||
4748 | ext4_feat = ef; | ||
4749 | ret = 0; | ||
4750 | out: | ||
4751 | return ret; | ||
4752 | } | ||
4753 | |||
4754 | static int __init ext4_init_fs(void) | ||
4280 | { | 4755 | { |
4281 | int err; | 4756 | int err; |
4282 | 4757 | ||
4283 | ext4_check_flag_values(); | 4758 | ext4_check_flag_values(); |
4284 | err = init_ext4_system_zone(); | 4759 | err = ext4_init_pageio(); |
4285 | if (err) | 4760 | if (err) |
4286 | return err; | 4761 | return err; |
4762 | err = ext4_init_system_zone(); | ||
4763 | if (err) | ||
4764 | goto out5; | ||
4287 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 4765 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
4288 | if (!ext4_kset) | 4766 | if (!ext4_kset) |
4289 | goto out4; | 4767 | goto out4; |
4290 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 4768 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
4291 | err = init_ext4_mballoc(); | 4769 | |
4770 | err = ext4_init_feat_adverts(); | ||
4771 | |||
4772 | err = ext4_init_mballoc(); | ||
4292 | if (err) | 4773 | if (err) |
4293 | goto out3; | 4774 | goto out3; |
4294 | 4775 | ||
4295 | err = init_ext4_xattr(); | 4776 | err = ext4_init_xattr(); |
4296 | if (err) | 4777 | if (err) |
4297 | goto out2; | 4778 | goto out2; |
4298 | err = init_inodecache(); | 4779 | err = init_inodecache(); |
@@ -4303,38 +4784,46 @@ static int __init init_ext4_fs(void) | |||
4303 | err = register_filesystem(&ext4_fs_type); | 4784 | err = register_filesystem(&ext4_fs_type); |
4304 | if (err) | 4785 | if (err) |
4305 | goto out; | 4786 | goto out; |
4787 | |||
4788 | ext4_li_info = NULL; | ||
4789 | mutex_init(&ext4_li_mtx); | ||
4306 | return 0; | 4790 | return 0; |
4307 | out: | 4791 | out: |
4308 | unregister_as_ext2(); | 4792 | unregister_as_ext2(); |
4309 | unregister_as_ext3(); | 4793 | unregister_as_ext3(); |
4310 | destroy_inodecache(); | 4794 | destroy_inodecache(); |
4311 | out1: | 4795 | out1: |
4312 | exit_ext4_xattr(); | 4796 | ext4_exit_xattr(); |
4313 | out2: | 4797 | out2: |
4314 | exit_ext4_mballoc(); | 4798 | ext4_exit_mballoc(); |
4315 | out3: | 4799 | out3: |
4800 | kfree(ext4_feat); | ||
4316 | remove_proc_entry("fs/ext4", NULL); | 4801 | remove_proc_entry("fs/ext4", NULL); |
4317 | kset_unregister(ext4_kset); | 4802 | kset_unregister(ext4_kset); |
4318 | out4: | 4803 | out4: |
4319 | exit_ext4_system_zone(); | 4804 | ext4_exit_system_zone(); |
4805 | out5: | ||
4806 | ext4_exit_pageio(); | ||
4320 | return err; | 4807 | return err; |
4321 | } | 4808 | } |
4322 | 4809 | ||
4323 | static void __exit exit_ext4_fs(void) | 4810 | static void __exit ext4_exit_fs(void) |
4324 | { | 4811 | { |
4812 | ext4_destroy_lazyinit_thread(); | ||
4325 | unregister_as_ext2(); | 4813 | unregister_as_ext2(); |
4326 | unregister_as_ext3(); | 4814 | unregister_as_ext3(); |
4327 | unregister_filesystem(&ext4_fs_type); | 4815 | unregister_filesystem(&ext4_fs_type); |
4328 | destroy_inodecache(); | 4816 | destroy_inodecache(); |
4329 | exit_ext4_xattr(); | 4817 | ext4_exit_xattr(); |
4330 | exit_ext4_mballoc(); | 4818 | ext4_exit_mballoc(); |
4331 | remove_proc_entry("fs/ext4", NULL); | 4819 | remove_proc_entry("fs/ext4", NULL); |
4332 | kset_unregister(ext4_kset); | 4820 | kset_unregister(ext4_kset); |
4333 | exit_ext4_system_zone(); | 4821 | ext4_exit_system_zone(); |
4822 | ext4_exit_pageio(); | ||
4334 | } | 4823 | } |
4335 | 4824 | ||
4336 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 4825 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
4337 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); | 4826 | MODULE_DESCRIPTION("Fourth Extended Filesystem"); |
4338 | MODULE_LICENSE("GPL"); | 4827 | MODULE_LICENSE("GPL"); |
4339 | module_init(init_ext4_fs) | 4828 | module_init(ext4_init_fs) |
4340 | module_exit(exit_ext4_fs) | 4829 | module_exit(ext4_exit_fs) |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3a8cd8dff1ad..fa4b899da4b3 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, | |||
1588 | #undef BLOCK_HASH_SHIFT | 1588 | #undef BLOCK_HASH_SHIFT |
1589 | 1589 | ||
1590 | int __init | 1590 | int __init |
1591 | init_ext4_xattr(void) | 1591 | ext4_init_xattr(void) |
1592 | { | 1592 | { |
1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); | 1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); |
1594 | if (!ext4_xattr_cache) | 1594 | if (!ext4_xattr_cache) |
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void) | |||
1597 | } | 1597 | } |
1598 | 1598 | ||
1599 | void | 1599 | void |
1600 | exit_ext4_xattr(void) | 1600 | ext4_exit_xattr(void) |
1601 | { | 1601 | { |
1602 | if (ext4_xattr_cache) | 1602 | if (ext4_xattr_cache) |
1603 | mb_cache_destroy(ext4_xattr_cache); | 1603 | mb_cache_destroy(ext4_xattr_cache); |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 518e96e43905..281dd8353652 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *); | |||
83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | 83 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, |
84 | struct ext4_inode *raw_inode, handle_t *handle); | 84 | struct ext4_inode *raw_inode, handle_t *handle); |
85 | 85 | ||
86 | extern int init_ext4_xattr(void); | 86 | extern int __init ext4_init_xattr(void); |
87 | extern void exit_ext4_xattr(void); | 87 | extern void ext4_exit_xattr(void); |
88 | 88 | ||
89 | extern const struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb) | |||
121 | { | 121 | { |
122 | } | 122 | } |
123 | 123 | ||
124 | static inline int | 124 | static __init inline int |
125 | init_ext4_xattr(void) | 125 | init_ext4_xattr(void) |
126 | { | 126 | { |
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static inline void | 130 | static inline void |
131 | exit_ext4_xattr(void) | 131 | ext4_exit_xattr(void) |
132 | { | 132 | { |
133 | } | 133 | } |
134 | 134 | ||
diff --git a/fs/ioctl.c b/fs/ioctl.c index f855ea4fc888..e92fdbb3bc3a 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp) | |||
530 | return thaw_super(sb); | 530 | return thaw_super(sb); |
531 | } | 531 | } |
532 | 532 | ||
533 | static int ioctl_fstrim(struct file *filp, void __user *argp) | ||
534 | { | ||
535 | struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; | ||
536 | struct fstrim_range range; | ||
537 | int ret = 0; | ||
538 | |||
539 | if (!capable(CAP_SYS_ADMIN)) | ||
540 | return -EPERM; | ||
541 | |||
542 | /* If filesystem doesn't support trim feature, return. */ | ||
543 | if (sb->s_op->trim_fs == NULL) | ||
544 | return -EOPNOTSUPP; | ||
545 | |||
546 | /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */ | ||
547 | if (sb->s_bdev == NULL) | ||
548 | return -EINVAL; | ||
549 | |||
550 | if (argp == NULL) { | ||
551 | range.start = 0; | ||
552 | range.len = ULLONG_MAX; | ||
553 | range.minlen = 0; | ||
554 | } else if (copy_from_user(&range, argp, sizeof(range))) | ||
555 | return -EFAULT; | ||
556 | |||
557 | ret = sb->s_op->trim_fs(sb, &range); | ||
558 | if (ret < 0) | ||
559 | return ret; | ||
560 | |||
561 | if ((argp != NULL) && | ||
562 | (copy_to_user(argp, &range, sizeof(range)))) | ||
563 | return -EFAULT; | ||
564 | |||
565 | return 0; | ||
566 | } | ||
567 | |||
533 | /* | 568 | /* |
534 | * When you add any new common ioctls to the switches above and below | 569 | * When you add any new common ioctls to the switches above and below |
535 | * please update compat_sys_ioctl() too. | 570 | * please update compat_sys_ioctl() too. |
@@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | |||
580 | error = ioctl_fsthaw(filp); | 615 | error = ioctl_fsthaw(filp); |
581 | break; | 616 | break; |
582 | 617 | ||
618 | case FITRIM: | ||
619 | error = ioctl_fstrim(filp, argp); | ||
620 | break; | ||
621 | |||
583 | case FS_IOC_FIEMAP: | 622 | case FS_IOC_FIEMAP: |
584 | return ioctl_fiemap(filp, arg); | 623 | return ioctl_fiemap(filp, arg); |
585 | 624 | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6571a056e55d..6a79fd0a1a32 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
299 | transaction->t_chp_stats.cs_forced_to_close++; | 299 | transaction->t_chp_stats.cs_forced_to_close++; |
300 | spin_unlock(&journal->j_list_lock); | 300 | spin_unlock(&journal->j_list_lock); |
301 | jbd_unlock_bh_state(bh); | 301 | jbd_unlock_bh_state(bh); |
302 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | ||
303 | /* | ||
304 | * The journal thread is dead; so starting and | ||
305 | * waiting for a commit to finish will cause | ||
306 | * us to wait for a _very_ long time. | ||
307 | */ | ||
308 | printk(KERN_ERR "JBD2: %s: " | ||
309 | "Waiting for Godot: block %llu\n", | ||
310 | journal->j_devname, | ||
311 | (unsigned long long) bh->b_blocknr); | ||
302 | jbd2_log_start_commit(journal, tid); | 312 | jbd2_log_start_commit(journal, tid); |
303 | jbd2_log_wait_commit(journal, tid); | 313 | jbd2_log_wait_commit(journal, tid); |
304 | ret = 1; | 314 | ret = 1; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index bc6be8bda1cc..f3ad1598b201 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -26,7 +26,9 @@ | |||
26 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
27 | #include <linux/bio.h> | 27 | #include <linux/bio.h> |
28 | #include <linux/blkdev.h> | 28 | #include <linux/blkdev.h> |
29 | #include <linux/bitops.h> | ||
29 | #include <trace/events/jbd2.h> | 30 | #include <trace/events/jbd2.h> |
31 | #include <asm/system.h> | ||
30 | 32 | ||
31 | /* | 33 | /* |
32 | * Default IO end handler for temporary BJ_IO buffer_heads. | 34 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal, | |||
201 | spin_lock(&journal->j_list_lock); | 203 | spin_lock(&journal->j_list_lock); |
202 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | 204 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
203 | mapping = jinode->i_vfs_inode->i_mapping; | 205 | mapping = jinode->i_vfs_inode->i_mapping; |
204 | jinode->i_flags |= JI_COMMIT_RUNNING; | 206 | set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
205 | spin_unlock(&journal->j_list_lock); | 207 | spin_unlock(&journal->j_list_lock); |
206 | /* | 208 | /* |
207 | * submit the inode data buffers. We use writepage | 209 | * submit the inode data buffers. We use writepage |
@@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal, | |||
216 | spin_lock(&journal->j_list_lock); | 218 | spin_lock(&journal->j_list_lock); |
217 | J_ASSERT(jinode->i_transaction == commit_transaction); | 219 | J_ASSERT(jinode->i_transaction == commit_transaction); |
218 | commit_transaction->t_flushed_data_blocks = 1; | 220 | commit_transaction->t_flushed_data_blocks = 1; |
219 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | 221 | clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
222 | smp_mb__after_clear_bit(); | ||
220 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | 223 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); |
221 | } | 224 | } |
222 | spin_unlock(&journal->j_list_lock); | 225 | spin_unlock(&journal->j_list_lock); |
@@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, | |||
237 | /* For locking, see the comment in journal_submit_data_buffers() */ | 240 | /* For locking, see the comment in journal_submit_data_buffers() */ |
238 | spin_lock(&journal->j_list_lock); | 241 | spin_lock(&journal->j_list_lock); |
239 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | 242 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
240 | jinode->i_flags |= JI_COMMIT_RUNNING; | 243 | set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
241 | spin_unlock(&journal->j_list_lock); | 244 | spin_unlock(&journal->j_list_lock); |
242 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); | 245 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); |
243 | if (err) { | 246 | if (err) { |
@@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, | |||
253 | ret = err; | 256 | ret = err; |
254 | } | 257 | } |
255 | spin_lock(&journal->j_list_lock); | 258 | spin_lock(&journal->j_list_lock); |
256 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | 259 | clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
260 | smp_mb__after_clear_bit(); | ||
257 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | 261 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); |
258 | } | 262 | } |
259 | 263 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 168d1894317a..538417c1fdbb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -42,12 +42,14 @@ | |||
42 | #include <linux/log2.h> | 42 | #include <linux/log2.h> |
43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
44 | #include <linux/backing-dev.h> | 44 | #include <linux/backing-dev.h> |
45 | #include <linux/bitops.h> | ||
45 | 46 | ||
46 | #define CREATE_TRACE_POINTS | 47 | #define CREATE_TRACE_POINTS |
47 | #include <trace/events/jbd2.h> | 48 | #include <trace/events/jbd2.h> |
48 | 49 | ||
49 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
50 | #include <asm/page.h> | 51 | #include <asm/page.h> |
52 | #include <asm/system.h> | ||
51 | 53 | ||
52 | EXPORT_SYMBOL(jbd2_journal_extend); | 54 | EXPORT_SYMBOL(jbd2_journal_extend); |
53 | EXPORT_SYMBOL(jbd2_journal_stop); | 55 | EXPORT_SYMBOL(jbd2_journal_stop); |
@@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal, | |||
2210 | restart: | 2212 | restart: |
2211 | spin_lock(&journal->j_list_lock); | 2213 | spin_lock(&journal->j_list_lock); |
2212 | /* Is commit writing out inode - we have to wait */ | 2214 | /* Is commit writing out inode - we have to wait */ |
2213 | if (jinode->i_flags & JI_COMMIT_RUNNING) { | 2215 | if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) { |
2214 | wait_queue_head_t *wq; | 2216 | wait_queue_head_t *wq; |
2215 | DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); | 2217 | DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); |
2216 | wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); | 2218 | wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f3479d6e0a83..6bf0a242613e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -156,6 +156,7 @@ alloc_transaction: | |||
156 | */ | 156 | */ |
157 | repeat: | 157 | repeat: |
158 | read_lock(&journal->j_state_lock); | 158 | read_lock(&journal->j_state_lock); |
159 | BUG_ON(journal->j_flags & JBD2_UNMOUNT); | ||
159 | if (is_journal_aborted(journal) || | 160 | if (is_journal_aborted(journal) || |
160 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { | 161 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { |
161 | read_unlock(&journal->j_state_lock); | 162 | read_unlock(&journal->j_state_lock); |