aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ext4.h
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2010-10-27 21:30:05 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-10-27 21:30:05 -0400
commitbfff68738f1cb5c93dab1114634cea02aae9e7ba (patch)
treeb6cdf3f26e86464c7088cab62d837eb32f559fb9 /fs/ext4/ext4.h
parente6fa0be699449d28a20e815bfe9ce26725ec4962 (diff)
ext4: add support for lazy inode table initialization
When the lazy_itable_init extended option is passed to mke2fs, it considerably speeds up filesystem creation because inode tables are not zeroed out. The fact that parts of the inode table are uninitialized is not a problem so long as the block group descriptors, which contain information regarding how much of the inode table has been initialized, has not been corrupted However, if the block group checksums are not valid, e2fsck must scan the entire inode table, and the the old, uninitialized data could potentially cause e2fsck to report false problems. Hence, it is important for the inode tables to be initialized as soon as possble. This commit adds this feature so that mke2fs can safely use the lazy inode table initialization feature to speed up formatting file systems. This is done via a new new kernel thread called ext4lazyinit, which is created on demand and destroyed, when it is no longer needed. There is only one thread for all ext4 filesystems in the system. When the first filesystem with inititable mount option is mounted, ext4lazyinit thread is created, then the filesystem can register its request in the request list. This thread then walks through the list of requests picking up scheduled requests and invoking ext4_init_inode_table(). Next schedule time for the request is computed by multiplying the time it took to zero out last inode table with wait multiplier, which can be set with the (init_itable=n) mount option (default is 10). We are doing this so we do not take the whole I/O bandwidth. When the thread is no longer necessary (request list is empty) it frees the appropriate structures and exits (and can be created later later by another filesystem). We do not disturb regular inode allocations in any way, it just do not care whether the inode table is, or is not zeroed. But when zeroing, we have to skip used inodes, obviously. Also we should prevent new inode allocations from the group, while zeroing is on the way. For that we take write alloc_sem lock in ext4_init_inode_table() and read alloc_sem in the ext4_claim_inode, so when we are unlucky and allocator hits the group which is currently being zeroed, it just has to wait. This can be suppresed using the mount option no_init_itable. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/ext4.h')
-rw-r--r--fs/ext4/ext4.h40
1 files changed, 40 insertions, 0 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b364b9df09b3..0fe078d368d0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -890,6 +890,7 @@ struct ext4_inode_info {
890#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 890#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
891#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 891#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
892#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 892#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
893#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
893 894
894#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 895#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
895#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 896#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1173,6 +1174,11 @@ struct ext4_sb_info {
1173 1174
1174 /* timer for periodic error stats printing */ 1175 /* timer for periodic error stats printing */
1175 struct timer_list s_err_report; 1176 struct timer_list s_err_report;
1177
1178 /* Lazy inode table initialization info */
1179 struct ext4_li_request *s_li_request;
1180 /* Wait multiplier for lazy initialization thread */
1181 unsigned int s_li_wait_mult;
1176}; 1182};
1177 1183
1178static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1184static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1537,6 +1543,38 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
1537extern struct proc_dir_entry *ext4_proc_root; 1543extern struct proc_dir_entry *ext4_proc_root;
1538 1544
1539/* 1545/*
1546 * Timeout and state flag for lazy initialization inode thread.
1547 */
1548#define EXT4_DEF_LI_WAIT_MULT 10
1549#define EXT4_DEF_LI_MAX_START_DELAY 5
1550#define EXT4_LAZYINIT_QUIT 0x0001
1551#define EXT4_LAZYINIT_RUNNING 0x0002
1552
1553/*
1554 * Lazy inode table initialization info
1555 */
1556struct ext4_lazy_init {
1557 unsigned long li_state;
1558
1559 wait_queue_head_t li_wait_daemon;
1560 wait_queue_head_t li_wait_task;
1561 struct timer_list li_timer;
1562 struct task_struct *li_task;
1563
1564 struct list_head li_request_list;
1565 struct mutex li_list_mtx;
1566};
1567
1568struct ext4_li_request {
1569 struct super_block *lr_super;
1570 struct ext4_sb_info *lr_sbi;
1571 ext4_group_t lr_next_group;
1572 struct list_head lr_request;
1573 unsigned long lr_next_sched;
1574 unsigned long lr_timeout;
1575};
1576
1577/*
1540 * Function prototypes 1578 * Function prototypes
1541 */ 1579 */
1542 1580
@@ -1611,6 +1649,8 @@ extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
1611 ext4_group_t group, 1649 ext4_group_t group,
1612 struct ext4_group_desc *desc); 1650 struct ext4_group_desc *desc);
1613extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); 1651extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1652extern int ext4_init_inode_table(struct super_block *sb,
1653 ext4_group_t group, int barrier);
1614 1654
1615/* mballoc.c */ 1655/* mballoc.c */
1616extern long ext4_mb_stats; 1656extern long ext4_mb_stats;