diff options
author | Lukas Czerner <lczerner@redhat.com> | 2010-10-27 21:30:05 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 21:30:05 -0400 |
commit | bfff68738f1cb5c93dab1114634cea02aae9e7ba (patch) | |
tree | b6cdf3f26e86464c7088cab62d837eb32f559fb9 /fs/ext4/ext4.h | |
parent | e6fa0be699449d28a20e815bfe9ce26725ec4962 (diff) |
ext4: add support for lazy inode table initialization
When the lazy_itable_init extended option is passed to mke2fs, it
considerably speeds up filesystem creation because inode tables are
not zeroed out. The fact that parts of the inode table are
uninitialized is not a problem so long as the block group descriptors,
which contain information regarding how much of the inode table has
been initialized, has not been corrupted However, if the block group
checksums are not valid, e2fsck must scan the entire inode table, and
the the old, uninitialized data could potentially cause e2fsck to
report false problems.
Hence, it is important for the inode tables to be initialized as soon
as possble. This commit adds this feature so that mke2fs can safely
use the lazy inode table initialization feature to speed up formatting
file systems.
This is done via a new new kernel thread called ext4lazyinit, which is
created on demand and destroyed, when it is no longer needed. There
is only one thread for all ext4 filesystems in the system. When the
first filesystem with inititable mount option is mounted, ext4lazyinit
thread is created, then the filesystem can register its request in the
request list.
This thread then walks through the list of requests picking up
scheduled requests and invoking ext4_init_inode_table(). Next schedule
time for the request is computed by multiplying the time it took to
zero out last inode table with wait multiplier, which can be set with
the (init_itable=n) mount option (default is 10). We are doing
this so we do not take the whole I/O bandwidth. When the thread is no
longer necessary (request list is empty) it frees the appropriate
structures and exits (and can be created later later by another
filesystem).
We do not disturb regular inode allocations in any way, it just do not
care whether the inode table is, or is not zeroed. But when zeroing, we
have to skip used inodes, obviously. Also we should prevent new inode
allocations from the group, while zeroing is on the way. For that we
take write alloc_sem lock in ext4_init_inode_table() and read alloc_sem
in the ext4_claim_inode, so when we are unlucky and allocator hits the
group which is currently being zeroed, it just has to wait.
This can be suppresed using the mount option no_init_itable.
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/ext4.h')
-rw-r--r-- | fs/ext4/ext4.h | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b364b9df09b3..0fe078d368d0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -890,6 +890,7 @@ struct ext4_inode_info { | |||
890 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 890 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
891 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 891 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
892 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 892 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
893 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | ||
893 | 894 | ||
894 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 895 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
895 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt | 896 | #define set_opt(o, opt) o |= EXT4_MOUNT_##opt |
@@ -1173,6 +1174,11 @@ struct ext4_sb_info { | |||
1173 | 1174 | ||
1174 | /* timer for periodic error stats printing */ | 1175 | /* timer for periodic error stats printing */ |
1175 | struct timer_list s_err_report; | 1176 | struct timer_list s_err_report; |
1177 | |||
1178 | /* Lazy inode table initialization info */ | ||
1179 | struct ext4_li_request *s_li_request; | ||
1180 | /* Wait multiplier for lazy initialization thread */ | ||
1181 | unsigned int s_li_wait_mult; | ||
1176 | }; | 1182 | }; |
1177 | 1183 | ||
1178 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1184 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1537,6 +1543,38 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | |||
1537 | extern struct proc_dir_entry *ext4_proc_root; | 1543 | extern struct proc_dir_entry *ext4_proc_root; |
1538 | 1544 | ||
1539 | /* | 1545 | /* |
1546 | * Timeout and state flag for lazy initialization inode thread. | ||
1547 | */ | ||
1548 | #define EXT4_DEF_LI_WAIT_MULT 10 | ||
1549 | #define EXT4_DEF_LI_MAX_START_DELAY 5 | ||
1550 | #define EXT4_LAZYINIT_QUIT 0x0001 | ||
1551 | #define EXT4_LAZYINIT_RUNNING 0x0002 | ||
1552 | |||
1553 | /* | ||
1554 | * Lazy inode table initialization info | ||
1555 | */ | ||
1556 | struct ext4_lazy_init { | ||
1557 | unsigned long li_state; | ||
1558 | |||
1559 | wait_queue_head_t li_wait_daemon; | ||
1560 | wait_queue_head_t li_wait_task; | ||
1561 | struct timer_list li_timer; | ||
1562 | struct task_struct *li_task; | ||
1563 | |||
1564 | struct list_head li_request_list; | ||
1565 | struct mutex li_list_mtx; | ||
1566 | }; | ||
1567 | |||
1568 | struct ext4_li_request { | ||
1569 | struct super_block *lr_super; | ||
1570 | struct ext4_sb_info *lr_sbi; | ||
1571 | ext4_group_t lr_next_group; | ||
1572 | struct list_head lr_request; | ||
1573 | unsigned long lr_next_sched; | ||
1574 | unsigned long lr_timeout; | ||
1575 | }; | ||
1576 | |||
1577 | /* | ||
1540 | * Function prototypes | 1578 | * Function prototypes |
1541 | */ | 1579 | */ |
1542 | 1580 | ||
@@ -1611,6 +1649,8 @@ extern unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
1611 | ext4_group_t group, | 1649 | ext4_group_t group, |
1612 | struct ext4_group_desc *desc); | 1650 | struct ext4_group_desc *desc); |
1613 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | 1651 | extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
1652 | extern int ext4_init_inode_table(struct super_block *sb, | ||
1653 | ext4_group_t group, int barrier); | ||
1614 | 1654 | ||
1615 | /* mballoc.c */ | 1655 | /* mballoc.c */ |
1616 | extern long ext4_mb_stats; | 1656 | extern long ext4_mb_stats; |