From 8cef107a1d890ff76c85d665083ff3562be01d32 Mon Sep 17 00:00:00 2001 From: Frans van de Wiel Date: Mon, 15 Mar 2010 19:29:34 +0100 Subject: ext3: Avoid loading bitmaps for full groups during block allocation There is no point in loading bitmap for groups which are completely full. This causes noticeable performance problems (and memory pressure) on small systems with large full filesystem (http://marc.info/?l=linux-ext4&m=126843108314310&w=2). Jan Kara: Added a comment and changed check to use cpu-endian value. Signed-off-by: "Frans van de Wiel" Signed-off-by: Jan Kara --- fs/ext3/balloc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a177122a1b25..4a32511f4ded 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1583,6 +1583,12 @@ retry_alloc: if (!gdp) goto io_error; free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * skip this group (and avoid loading bitmap) if there + * are no free blocks + */ + if (!free_blocks) + continue; /* * skip this group if the number of * free blocks is less than half of the reservation -- cgit v1.2.2 From 46891532370e862d6bddedef9e6ca22a59a51fa4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 29 Mar 2010 13:55:39 +0200 Subject: ext2: Avoid loading bitmaps for full groups during block allocation There is no point in loading bitmap for groups which are completely full. This causes noticeable performance problems (and memory pressure) on small systems with large full filesystem (http://marc.info/?l=linux-ext4&m=126843108314310&w=2). Port of the same ext3 patch. Signed-off-by: Jan Kara --- fs/ext2/balloc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 3cf038c055d7..e8766a396776 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1331,6 +1331,12 @@ retry_alloc: goto io_error; free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * skip this group (and avoid loading bitmap) if there + * are no free blocks + */ + if (!free_blocks) + continue; /* * skip this group if the number of * free blocks is less than half of the reservation -- cgit v1.2.2 From eabf290d1470921f0ce5a9b22464ae30646a0677 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 27 Mar 2010 15:15:38 +0300 Subject: quota: optimize mark_dirty logic - Skip locking if quota is dirty already. - Return old quota state to help fs-specciffic implementation to optimize case where quota was dirty already. Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/quota/dquot.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 788b5802a7ce..05c590e10ac2 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -317,14 +317,23 @@ static inline int mark_dquot_dirty(struct dquot *dquot) return dquot->dq_sb->dq_op->mark_dirty(dquot); } +/* Mark dquot dirty in atomic manner, and return it's old dirty flag state */ int dquot_mark_dquot_dirty(struct dquot *dquot) { + int ret = 1; + + /* If quota is dirty already, we don't have to acquire dq_list_lock */ + if (test_bit(DQ_MOD_B, &dquot->dq_flags)) + return 1; + spin_lock(&dq_list_lock); - if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) + if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> info[dquot->dq_type].dqi_dirty_list); + ret = 0; + } spin_unlock(&dq_list_lock); - return 0; + return ret; } EXPORT_SYMBOL(dquot_mark_dquot_dirty); -- cgit v1.2.2 From 524e4a1d102bdcee37297c0b763e945827b33ab8 Mon Sep 17 00:00:00 2001 From: Francis Moreau Date: Thu, 8 Apr 2010 11:35:17 +0200 Subject: ext2: remove useless call to brelse() in ext2_free_inode() This patch removes a useless call to brelse(bitmap_bh) since at that point bitmap_bh is NULL and slightly cleans up bitmap_bh handling. Signed-off-by: Francis Moreau Signed-off-by: Jan Kara --- fs/ext2/ialloc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index ad7d572ee8dc..f0c5286f9342 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -106,7 +106,7 @@ void ext2_free_inode (struct inode * inode) struct super_block * sb = inode->i_sb; int is_directory; unsigned long ino; - struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bitmap_bh; unsigned long block_group; unsigned long bit; struct ext2_super_block * es; @@ -135,14 +135,13 @@ void ext2_free_inode (struct inode * inode) ino > le32_to_cpu(es->s_inodes_count)) { ext2_error (sb, "ext2_free_inode", "reserved or nonexistent inode %lu", ino); - goto error_return; + return; } block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); - brelse(bitmap_bh); bitmap_bh = read_inode_bitmap(sb, block_group); if (!bitmap_bh) - goto error_return; + return; /* Ok, now we can actually update the inode bitmaps.. */ if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), @@ -154,7 +153,7 @@ void ext2_free_inode (struct inode * inode) mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); -error_return: + brelse(bitmap_bh); } -- cgit v1.2.2 From 41d1a636b813867339db52e12377ca132d54700f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 12 Apr 2010 23:46:00 +0400 Subject: ext3: init statistics after journal recovery v2 Currently block/inode/dir counters are initialized before journal was recovered. In fact after journal recovery this info will probably change which results in incorrect numbers returned from statfs(2). BUG:#15768 Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext3/super.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 1bee604cc6cd..6b6e49de0916 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1890,21 +1890,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); - err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb)); - if (!err) { - err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb)); - } - if (!err) { - err = percpu_counter_init(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - } - if (err) { - ext3_msg(sb, KERN_ERR, "error: insufficient memory"); - goto failed_mount3; - } - /* per fileystem reservation list head & lock */ spin_lock_init(&sbi->s_rsv_window_lock); sbi->s_rsv_window_root = RB_ROOT; @@ -1945,15 +1930,29 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (!test_opt(sb, NOLOAD) && EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount3; + goto failed_mount2; } else if (journal_inum) { if (ext3_create_journal(sb, es, journal_inum)) - goto failed_mount3; + goto failed_mount2; } else { if (!silent) ext3_msg(sb, KERN_ERR, "error: no journal found. " "mounting ext3 over ext2?"); + goto failed_mount2; + } + err = percpu_counter_init(&sbi->s_freeblocks_counter, + ext3_count_free_blocks(sb)); + if (!err) { + err = percpu_counter_init(&sbi->s_freeinodes_counter, + ext3_count_free_inodes(sb)); + } + if (!err) { + err = percpu_counter_init(&sbi->s_dirs_counter, + ext3_count_dirs(sb)); + } + if (err) { + ext3_msg(sb, KERN_ERR, "error: insufficient memory"); goto failed_mount3; } @@ -1978,7 +1977,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) ext3_msg(sb, KERN_ERR, "error: journal does not support " "requested data journaling mode"); - goto failed_mount4; + goto failed_mount3; } default: break; @@ -2001,19 +2000,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (IS_ERR(root)) { ext3_msg(sb, KERN_ERR, "error: get root inode failed"); ret = PTR_ERR(root); - goto failed_mount4; + goto failed_mount3; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { iput(root); ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); - goto failed_mount4; + goto failed_mount3; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); iput(root); ret = -ENOMEM; - goto failed_mount4; + goto failed_mount3; } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -2039,12 +2038,11 @@ cantfind_ext3: sb->s_id); goto failed_mount; -failed_mount4: - journal_destroy(sbi->s_journal); failed_mount3: percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); + journal_destroy(sbi->s_journal); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); -- cgit v1.2.2 From 2b8120efb2d41e2aefce3b06cf3fd085f71e9021 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Apr 2010 14:38:33 +0200 Subject: ext2: Use ext2_clear_super_error() in ext2_sync_fs() ext2_sync_fs() used to duplicate the code from ext2_clear_super_error(). Signed-off-by: Jan Blunck Signed-off-by: Jan Kara --- fs/ext2/super.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 42e4a303b675..8e8b675ac202 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1120,8 +1120,8 @@ static void ext2_clear_super_error(struct super_block *sb) * be remapped. Nothing we can do but to retry the * write and hope for the best. */ - printk(KERN_ERR "EXT2-fs: %s previous I/O error to " - "superblock detected", sb->s_id); + ext2_msg(sb, KERN_ERR, + "previous I/O error to superblock detected\n"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } @@ -1161,23 +1161,9 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) static int ext2_sync_fs(struct super_block *sb, int wait) { struct ext2_super_block *es = EXT2_SB(sb)->s_es; - struct buffer_head *sbh = EXT2_SB(sb)->s_sbh; lock_kernel(); - if (buffer_write_io_error(sbh)) { - /* - * Oh, dear. A previous attempt to write the - * superblock failed. This could happen because the - * USB device was yanked out. Or it could happen to - * be a transient write error and maybe the block will - * be remapped. Nothing we can do but to retry the - * write and hope for the best. - */ - ext2_msg(sb, KERN_ERR, - "previous I/O error to superblock detected\n"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } + ext2_clear_super_error(sb); if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); -- cgit v1.2.2 From 269c8db30cf5b60f47a44bbceaac118b986895d8 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Apr 2010 14:38:34 +0200 Subject: ext2: Set the write time in ext2_sync_fs() This is probably a typo since the write time should actually be updated by ext2_sync_fs() instead of the mount time. Signed-off-by: Jan Blunck Signed-off-by: Jan Kara --- fs/ext2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 8e8b675ac202..b2050032424f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1172,7 +1172,7 @@ static int ext2_sync_fs(struct super_block *sb, int wait) cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); - es->s_mtime = cpu_to_le32(get_seconds()); + es->s_wtime = cpu_to_le32(get_seconds()); ext2_sync_super(sb, es); } else { ext2_commit_super(sb, es); -- cgit v1.2.2 From 20da9baf4cf9c627aaf7b00d64ce0b2221bab9bf Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Apr 2010 14:38:35 +0200 Subject: ext2: Remove duplicate code from ext2_sync_fs() Depending in the state (valid or unchecked) of the filesystem either ext2_sync_super() or ext2_commit_super() is called. If the filesystem is currently valid (it is checked), we first mark it unchecked and afterwards duplicate the work that ext2_sync_super() is doing later. Therefore this patch removes the duplicate code and calls ext2_sync_super() directly after marking the filesystem unchecked. Signed-off-by: Jan Blunck Signed-off-by: Jan Kara --- fs/ext2/super.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b2050032424f..09a88bf04579 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1163,16 +1163,9 @@ static int ext2_sync_fs(struct super_block *sb, int wait) struct ext2_super_block *es = EXT2_SB(sb)->s_es; lock_kernel(); - ext2_clear_super_error(sb); - if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); es->s_state &= cpu_to_le16(~EXT2_VALID_FS); - es->s_free_blocks_count = - cpu_to_le32(ext2_count_free_blocks(sb)); - es->s_free_inodes_count = - cpu_to_le32(ext2_count_free_inodes(sb)); - es->s_wtime = cpu_to_le32(get_seconds()); ext2_sync_super(sb, es); } else { ext2_commit_super(sb, es); -- cgit v1.2.2 From ee6921ebd04cb807dfe88b10ad80f1124813c673 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Apr 2010 14:38:36 +0200 Subject: ext2: Fold ext2_commit_super() into ext2_sync_super() Both function originally did similar things except that ext2_sync_super() is returning after the call to sync_dirty_buffer(sbh). Therefore this patch adds a wait flag to tell ext2_sync_super() if it has to call sync_dirty_buffer() to wait for in-progress I/O to finish. Signed-off-by: Jan Blunck Signed-off-by: Jan Kara --- fs/ext2/super.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 09a88bf04579..a304c544571f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -39,7 +39,7 @@ #include "xip.h" static void ext2_sync_super(struct super_block *sb, - struct ext2_super_block *es); + struct ext2_super_block *es, int wait); static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext2_sync_fs(struct super_block *sb, int wait); @@ -54,7 +54,7 @@ void ext2_error (struct super_block * sb, const char * function, if (!(sb->s_flags & MS_RDONLY)) { sbi->s_mount_state |= EXT2_ERROR_FS; es->s_state |= cpu_to_le16(EXT2_ERROR_FS); - ext2_sync_super(sb, es); + ext2_sync_super(sb, es, 1); } va_start(args, fmt); @@ -125,7 +125,7 @@ static void ext2_put_super (struct super_block * sb) struct ext2_super_block *es = sbi->s_es; es->s_state = cpu_to_le16(sbi->s_mount_state); - ext2_sync_super(sb, es); + ext2_sync_super(sb, es, 1); } db_count = sbi->s_gdb_count; for (i = 0; i < db_count; i++) @@ -1127,23 +1127,16 @@ static void ext2_clear_super_error(struct super_block *sb) } } -static void ext2_commit_super (struct super_block * sb, - struct ext2_super_block * es) -{ - ext2_clear_super_error(sb); - es->s_wtime = cpu_to_le32(get_seconds()); - mark_buffer_dirty(EXT2_SB(sb)->s_sbh); - sb->s_dirt = 0; -} - -static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) +static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait) { ext2_clear_super_error(sb); es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); es->s_wtime = cpu_to_le32(get_seconds()); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); - sync_dirty_buffer(EXT2_SB(sb)->s_sbh); + if (wait) + sync_dirty_buffer(EXT2_SB(sb)->s_sbh); sb->s_dirt = 0; } @@ -1166,11 +1159,8 @@ static int ext2_sync_fs(struct super_block *sb, int wait) if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); es->s_state &= cpu_to_le16(~EXT2_VALID_FS); - ext2_sync_super(sb, es); - } else { - ext2_commit_super(sb, es); } - sb->s_dirt = 0; + ext2_sync_super(sb, es, wait); unlock_kernel(); return 0; @@ -1268,7 +1258,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) if (!ext2_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; } - ext2_sync_super(sb, es); + ext2_sync_super(sb, es, 1); unlock_kernel(); return 0; restore_opts: -- cgit v1.2.2 From 4c96a68bfc110d87b28bcee4c395a7b4d26ed67a Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Apr 2010 14:38:37 +0200 Subject: ext2: Move ext2_write_super() out of ext2_setup_super() Move ext2_write_super() out of ext2_setup_super() as a preparation for the next patch that adds a new lock for superblock fields. Signed-off-by: Jan Blunck Signed-off-by: Jan Kara --- fs/ext2/super.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index a304c544571f..f28a7ad02af9 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -606,7 +606,6 @@ static int ext2_setup_super (struct super_block * sb, if (!le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); - ext2_write_super(sb); if (test_opt (sb, DEBUG)) ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " "bpg=%lu, ipg=%lu, mo=%04lx]", @@ -1079,7 +1078,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) ext2_msg(sb, KERN_WARNING, "warning: mounting ext3 filesystem as ext2"); - ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); + if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY)) + sb->s_flags |= MS_RDONLY; + ext2_write_super(sb); return 0; cantfind_ext2: @@ -1238,6 +1239,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) */ es->s_state = cpu_to_le16(sbi->s_mount_state); es->s_mtime = cpu_to_le32(get_seconds()); + ext2_sync_super(sb, es, 1); } else { __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP); @@ -1257,8 +1259,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext2_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; + ext2_write_super(sb); } - ext2_sync_super(sb, es, 1); unlock_kernel(); return 0; restore_opts: -- cgit v1.2.2 From c15271f4e74cd6dbdf461335d6d1450949c4b956 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Apr 2010 14:38:38 +0200 Subject: ext2: Add ext2_sb_info s_lock spinlock Add a spinlock that protects against concurrent modifications of s_mount_state, s_blocks_last, s_overhead_last and the content of the superblock's buffer pointed to by sbi->s_es. The spinlock is now used in ext2_xattr_update_super_block() which was setting the EXT2_FEATURE_COMPAT_EXT_ATTR flag on the superblock without protection before. Likewise the spinlock is used in ext2_show_options() to have a consistent view of the mount options. This is a preparation patch for removing the BKL from ext2 in the next patch. Signed-off-by: Jan Blunck Cc: Andi Kleen Cc: Jan Kara Cc: OGAWA Hirofumi Signed-off-by: Jan Kara --- fs/ext2/inode.c | 2 ++ fs/ext2/super.c | 27 ++++++++++++++++++++++++++- fs/ext2/xattr.c | 2 ++ include/linux/ext2_fs_sb.h | 9 +++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index fc13cc119aad..5d15442abbd0 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1407,9 +1407,11 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) * created, add a flag to the superblock. */ lock_kernel(); + spin_lock(&EXT2_SB(sb)->s_lock); ext2_update_dynamic_rev(sb); EXT2_SET_RO_COMPAT_FEATURE(sb, EXT2_FEATURE_RO_COMPAT_LARGE_FILE); + spin_unlock(&EXT2_SB(sb)->s_lock); unlock_kernel(); ext2_write_super(sb); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f28a7ad02af9..28f65609589d 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -52,8 +52,10 @@ void ext2_error (struct super_block * sb, const char * function, struct ext2_super_block *es = sbi->s_es; if (!(sb->s_flags & MS_RDONLY)) { + spin_lock(&sbi->s_lock); sbi->s_mount_state |= EXT2_ERROR_FS; es->s_state |= cpu_to_le16(EXT2_ERROR_FS); + spin_unlock(&sbi->s_lock); ext2_sync_super(sb, es, 1); } @@ -84,6 +86,9 @@ void ext2_msg(struct super_block *sb, const char *prefix, va_end(args); } +/* + * This must be called with sbi->s_lock held. + */ void ext2_update_dynamic_rev(struct super_block *sb) { struct ext2_super_block *es = EXT2_SB(sb)->s_es; @@ -124,7 +129,9 @@ static void ext2_put_super (struct super_block * sb) if (!(sb->s_flags & MS_RDONLY)) { struct ext2_super_block *es = sbi->s_es; + spin_lock(&sbi->s_lock); es->s_state = cpu_to_le16(sbi->s_mount_state); + spin_unlock(&sbi->s_lock); ext2_sync_super(sb, es, 1); } db_count = sbi->s_gdb_count; @@ -209,6 +216,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) struct ext2_super_block *es = sbi->s_es; unsigned long def_mount_opts; + spin_lock(&sbi->s_lock); def_mount_opts = le32_to_cpu(es->s_default_mount_opts); if (sbi->s_sb_block != 1) @@ -281,6 +289,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) if (!test_opt(sb, RESERVATION)) seq_puts(seq, ",noreservation"); + spin_unlock(&sbi->s_lock); return 0; } @@ -766,6 +775,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbi; sbi->s_sb_block = sb_block; + spin_lock_init(&sbi->s_lock); + /* * See what the current blocksize for the device is, and * use that as the blocksize. Otherwise (or if the blocksize @@ -1132,9 +1143,12 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, int wait) { ext2_clear_super_error(sb); + spin_lock(&EXT2_SB(sb)->s_lock); es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); es->s_wtime = cpu_to_le32(get_seconds()); + /* unlock before we do IO */ + spin_unlock(&EXT2_SB(sb)->s_lock); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); if (wait) sync_dirty_buffer(EXT2_SB(sb)->s_sbh); @@ -1151,16 +1165,18 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, * may have been checked while mounted and e2fsck may have * set s_state to EXT2_VALID_FS after some corrections. */ - static int ext2_sync_fs(struct super_block *sb, int wait) { + struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = EXT2_SB(sb)->s_es; lock_kernel(); + spin_lock(&sbi->s_lock); if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); es->s_state &= cpu_to_le16(~EXT2_VALID_FS); } + spin_unlock(&sbi->s_lock); ext2_sync_super(sb, es, wait); unlock_kernel(); @@ -1186,6 +1202,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) int err; lock_kernel(); + spin_lock(&sbi->s_lock); /* Store the old options */ old_sb_flags = sb->s_flags; @@ -1224,12 +1241,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; } if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + spin_unlock(&sbi->s_lock); unlock_kernel(); return 0; } if (*flags & MS_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || !(sbi->s_mount_state & EXT2_VALID_FS)) { + spin_unlock(&sbi->s_lock); unlock_kernel(); return 0; } @@ -1239,6 +1258,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) */ es->s_state = cpu_to_le16(sbi->s_mount_state); es->s_mtime = cpu_to_le32(get_seconds()); + spin_unlock(&sbi->s_lock); ext2_sync_super(sb, es, 1); } else { __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, @@ -1259,6 +1279,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext2_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; + spin_unlock(&sbi->s_lock); ext2_write_super(sb); } unlock_kernel(); @@ -1268,6 +1289,7 @@ restore_opts: sbi->s_resuid = old_opts.s_resuid; sbi->s_resgid = old_opts.s_resgid; sb->s_flags = old_sb_flags; + spin_unlock(&sbi->s_lock); unlock_kernel(); return err; } @@ -1279,6 +1301,8 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) struct ext2_super_block *es = sbi->s_es; u64 fsid; + spin_lock(&sbi->s_lock); + if (test_opt (sb, MINIX_DF)) sbi->s_overhead_last = 0; else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { @@ -1333,6 +1357,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) le64_to_cpup((void *)es->s_uuid + sizeof(u64)); buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; + spin_unlock(&sbi->s_lock); return 0; } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index e44dc92609be..3b96045a00ce 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -345,7 +345,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb) if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) return; + spin_lock(&EXT2_SB(sb)->s_lock); EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); + spin_unlock(&EXT2_SB(sb)->s_lock); sb->s_dirt = 1; mark_buffer_dirty(EXT2_SB(sb)->s_sbh); } diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h index 1cdb66367c98..db4d9f586bb6 100644 --- a/include/linux/ext2_fs_sb.h +++ b/include/linux/ext2_fs_sb.h @@ -106,6 +106,15 @@ struct ext2_sb_info { spinlock_t s_rsv_window_lock; struct rb_root s_rsv_window_root; struct ext2_reserve_window_node s_rsv_window_head; + /* + * s_lock protects against concurrent modifications of s_mount_state, + * s_blocks_last, s_overhead_last and the content of superblock's + * buffer pointed to by sbi->s_es. + * + * Note: It is used in ext2_show_options() to provide a consistent view + * of the mount options. + */ + spinlock_t s_lock; }; static inline spinlock_t * -- cgit v1.2.2 From e0a5cbac029db69032758000c67465c2ed7a5736 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Wed, 14 Apr 2010 14:38:39 +0200 Subject: BKL: Remove BKL from ext2 filesystem The BKL is still used in ext2_put_super(), ext2_fill_super(), ext2_sync_fs() ext2_remount() and ext2_write_inode(). From these calls ext2_put_super(), ext2_fill_super() and ext2_remount() are protected against each other by the struct super_block s_umount rw semaphore. The call in ext2_write_inode() could only protect the modification of the ext2_sb_info through ext2_update_dynamic_rev() against concurrent ext2_sync_fs() or ext2_remount(). ext2_fill_super() and ext2_put_super() can be left out because you need a valid filesystem reference in all three cases, which you do not have when you are one of these functions. If the BKL is only protecting the modification of the ext2_sb_info it can safely be removed since this is protected by the struct ext2_sb_info s_lock. Signed-off-by: Jan Blunck Cc: Jan Kara Signed-off-by: Jan Kara --- fs/ext2/inode.c | 3 --- fs/ext2/super.c | 13 ------------- 2 files changed, 16 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 5d15442abbd0..b90c3bf6e9ba 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -22,7 +22,6 @@ * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 */ -#include #include #include #include @@ -1406,13 +1405,11 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) /* If this is the first large file * created, add a flag to the superblock. */ - lock_kernel(); spin_lock(&EXT2_SB(sb)->s_lock); ext2_update_dynamic_rev(sb); EXT2_SET_RO_COMPAT_FEATURE(sb, EXT2_FEATURE_RO_COMPAT_LARGE_FILE); spin_unlock(&EXT2_SB(sb)->s_lock); - unlock_kernel(); ext2_write_super(sb); } } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 28f65609589d..71e9eb1fa696 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -120,8 +119,6 @@ static void ext2_put_super (struct super_block * sb) int i; struct ext2_sb_info *sbi = EXT2_SB(sb); - lock_kernel(); - if (sb->s_dirt) ext2_write_super(sb); @@ -147,8 +144,6 @@ static void ext2_put_super (struct super_block * sb) sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); - - unlock_kernel(); } static struct kmem_cache * ext2_inode_cachep; @@ -1170,7 +1165,6 @@ static int ext2_sync_fs(struct super_block *sb, int wait) struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = EXT2_SB(sb)->s_es; - lock_kernel(); spin_lock(&sbi->s_lock); if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); @@ -1178,8 +1172,6 @@ static int ext2_sync_fs(struct super_block *sb, int wait) } spin_unlock(&sbi->s_lock); ext2_sync_super(sb, es, wait); - unlock_kernel(); - return 0; } @@ -1201,7 +1193,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) unsigned long old_sb_flags; int err; - lock_kernel(); spin_lock(&sbi->s_lock); /* Store the old options */ @@ -1242,14 +1233,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) } if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { spin_unlock(&sbi->s_lock); - unlock_kernel(); return 0; } if (*flags & MS_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || !(sbi->s_mount_state & EXT2_VALID_FS)) { spin_unlock(&sbi->s_lock); - unlock_kernel(); return 0; } /* @@ -1282,7 +1271,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) spin_unlock(&sbi->s_lock); ext2_write_super(sb); } - unlock_kernel(); return 0; restore_opts: sbi->s_mount_opt = old_opts.s_mount_opt; @@ -1290,7 +1278,6 @@ restore_opts: sbi->s_resgid = old_opts.s_resgid; sb->s_flags = old_sb_flags; spin_unlock(&sbi->s_lock); - unlock_kernel(); return err; } -- cgit v1.2.2 From 311b9549ed2bb0f2c2257781c3e88cb00505e80e Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 15 Apr 2010 00:56:58 +0200 Subject: ufs: add ufs speciffic ->setattr call generic setattr not longer responsible for quota transfer. use ufs_setattr for all ufs's inodes. Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ufs/inode.c | 2 +- fs/ufs/namei.c | 2 +- fs/ufs/symlink.c | 8 ++++++++ fs/ufs/truncate.c | 2 +- fs/ufs/ufs.h | 2 ++ 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 80b68c3702d1..cffa756f1047 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -603,7 +603,7 @@ static void ufs_set_inode_ops(struct inode *inode) if (!inode->i_blocks) inode->i_op = &ufs_fast_symlink_inode_operations; else { - inode->i_op = &page_symlink_inode_operations; + inode->i_op = &ufs_symlink_inode_operations; inode->i_mapping->a_ops = &ufs_aops; } } else diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 118556243e7a..eabc02eb1294 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -148,7 +148,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ - inode->i_op = &page_symlink_inode_operations; + inode->i_op = &ufs_symlink_inode_operations; inode->i_mapping->a_ops = &ufs_aops; err = page_symlink(inode, symname, l); if (err) diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c index c0156eda44bc..d283628b4778 100644 --- a/fs/ufs/symlink.c +++ b/fs/ufs/symlink.c @@ -42,4 +42,12 @@ static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd) const struct inode_operations ufs_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ufs_follow_link, + .setattr = ufs_setattr, +}; + +const struct inode_operations ufs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .setattr = ufs_setattr, }; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index d3b6270cb377..ee8db3e77bfe 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -508,7 +508,7 @@ out: * - there is no way to know old size * - there is no way inform user about error, if it happens in `truncate' */ -static int ufs_setattr(struct dentry *dentry, struct iattr *attr) +int ufs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; unsigned int ia_valid = attr->ia_valid; diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 43f9f5d5670e..179ae6b3180a 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -122,9 +122,11 @@ extern void ufs_panic (struct super_block *, const char *, const char *, ...) __ /* symlink.c */ extern const struct inode_operations ufs_fast_symlink_inode_operations; +extern const struct inode_operations ufs_symlink_inode_operations; /* truncate.c */ extern int ufs_truncate (struct inode *, loff_t); +extern int ufs_setattr(struct dentry *dentry, struct iattr *attr); static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) { -- cgit v1.2.2 From 03f4d804a1b4748885dc4613a4afe10089a731c8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Apr 2010 22:16:24 +0200 Subject: jbd: Provide function to check whether transaction will issue data barrier Provide a function which returns whether a transaction with given tid will send a barrier to the filesystem device. The function will be used by ext3 to detect whether fsync needs to send a separate barrier or not. Signed-off-by: Jan Kara --- fs/jbd/commit.c | 8 +++++++- fs/jbd/journal.c | 33 +++++++++++++++++++++++++++++++++ include/linux/jbd.h | 3 ++- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index ecb44c94ba8d..28a9ddaa0c49 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -786,6 +786,12 @@ wait_for_iobuf: jbd_debug(3, "JBD: commit phase 6\n"); + /* All metadata is written, now write commit record and do cleanup */ + spin_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_COMMIT); + commit_transaction->t_state = T_COMMIT_RECORD; + spin_unlock(&journal->j_state_lock); + if (journal_write_commit_record(journal, commit_transaction)) err = -EIO; @@ -923,7 +929,7 @@ restart_loop: jbd_debug(3, "JBD: commit phase 8\n"); - J_ASSERT(commit_transaction->t_state == T_COMMIT); + J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD); commit_transaction->t_state = T_FINISHED; J_ASSERT(commit_transaction == journal->j_committing_transaction); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index bd224eec9b07..99c71940155a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -564,6 +564,38 @@ int log_wait_commit(journal_t *journal, tid_t tid) return err; } +/* + * Return 1 if a given transaction has not yet sent barrier request + * connected with a transaction commit. If 0 is returned, transaction + * may or may not have sent the barrier. Used to avoid sending barrier + * twice in common cases. + */ +int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid) +{ + int ret = 0; + transaction_t *commit_trans; + + if (!(journal->j_flags & JFS_BARRIER)) + return 0; + spin_lock(&journal->j_state_lock); + /* Transaction already committed? */ + if (tid_geq(journal->j_commit_sequence, tid)) + goto out; + /* + * Transaction is being committed and we already proceeded to + * writing commit record? + */ + commit_trans = journal->j_committing_transaction; + if (commit_trans && commit_trans->t_tid == tid && + commit_trans->t_state >= T_COMMIT_RECORD) + goto out; + ret = 1; +out: + spin_unlock(&journal->j_state_lock); + return ret; +} +EXPORT_SYMBOL(journal_commit_will_send_barrier); + /* * Log buffer allocation routines: */ @@ -1157,6 +1189,7 @@ int journal_destroy(journal_t *journal) { int err = 0; + /* Wait for the commit thread to wake up and die. */ journal_kill_thread(journal); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 516a2a27e87a..e06965081ba5 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -427,9 +427,9 @@ struct transaction_s enum { T_RUNNING, T_LOCKED, - T_RUNDOWN, T_FLUSH, T_COMMIT, + T_COMMIT_RECORD, T_FINISHED } t_state; @@ -991,6 +991,7 @@ int journal_start_commit(journal_t *journal, tid_t *tid); int journal_force_commit_nested(journal_t *journal); int log_wait_commit(journal_t *journal, tid_t tid); int log_do_checkpoint(journal_t *journal); +int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid); void __log_wait_for_space(journal_t *journal); extern void __journal_drop_transaction(journal_t *, transaction_t *); -- cgit v1.2.2 From 5277970878a32e437b27296e34c592e5d351f11d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Apr 2010 22:24:26 +0200 Subject: ext3: Fix waiting on transaction during fsync log_start_commit() returns 1 only when it started a transaction commit. Thus in case transaction commit is already running, we fail to wait for the commit to finish. Fix the issue by always waiting for the commit regardless of the log_start_commit return value. Signed-off-by: Jan Kara --- fs/ext3/fsync.c | 20 +++++++++----------- fs/jbd/journal.c | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 8209f266e9ad..26289e8f4163 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -48,7 +48,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; struct ext3_inode_info *ei = EXT3_I(inode); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; - int ret = 0; + int ret, needs_barrier = 0; tid_t commit_tid; if (inode->i_sb->s_flags & MS_RDONLY) @@ -70,28 +70,26 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ - if (ext3_should_journal_data(inode)) { - ret = ext3_force_commit(inode->i_sb); - goto out; - } + if (ext3_should_journal_data(inode)) + return ext3_force_commit(inode->i_sb); if (datasync) commit_tid = atomic_read(&ei->i_datasync_tid); else commit_tid = atomic_read(&ei->i_sync_tid); - if (log_start_commit(journal, commit_tid)) { - log_wait_commit(journal, commit_tid); - goto out; - } + if (test_opt(inode->i_sb, BARRIER) && + !journal_trans_will_send_data_barrier(journal, commit_tid)) + needs_barrier = 1; + log_start_commit(journal, commit_tid); + ret = log_wait_commit(journal, commit_tid); /* * In case we didn't commit a transaction, we have to flush * disk caches manually so that data really is on persistent * storage */ - if (test_opt(inode->i_sb, BARRIER)) + if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); -out: return ret; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 99c71940155a..93d1e47647bd 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -594,7 +594,7 @@ out: spin_unlock(&journal->j_state_lock); return ret; } -EXPORT_SYMBOL(journal_commit_will_send_barrier); +EXPORT_SYMBOL(journal_trans_will_send_data_barrier); /* * Log buffer allocation routines: -- cgit v1.2.2 From da8d1ba22fa1fd0c0e541a43d75ebb062589b14b Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 26 Apr 2010 12:09:26 +0200 Subject: suppress warning: "quotatypes" defined but not used Suppress compilation warning: "quotatypes" defined but not used. quotatypes is used only when CONFIG_QUOTA_DEBUG or CONFIG_PRINT_QUOTA_WARNING is/are defined. Signed-off-by: Sergey Senozhatsky Signed-off-by: Jan Kara --- fs/quota/dquot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 05c590e10ac2..ae766056350d 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -132,7 +132,9 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); +#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) static char *quotatypes[] = INITQFNAMES; +#endif static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; -- cgit v1.2.2 From dde9588853b1bde542eab247f8838c472806688f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 26 Apr 2010 20:03:33 +0400 Subject: quota: Make quota stat accounting lockless. Quota stats is mostly writable data structure. Let's alloc percpu bucket for each value. NOTE: dqstats_read() function is racy against dqstats_{inc,dec} and may return inconsistent value. But this is ok since absolute accuracy is not required. Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/quota/dquot.c | 102 +++++++++++++++++++++++++++++++++----------------- fs/quota/quota_tree.c | 4 +- fs/quota/quota_v1.c | 4 +- include/linux/quota.h | 42 +++++++++++++++++---- 4 files changed, 106 insertions(+), 46 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index ae766056350d..01347e81d0ca 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -82,7 +82,7 @@ /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas - * and quota formats, dqstats structure containing statistics about the lists + * and quota formats. * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly @@ -228,6 +228,10 @@ static struct hlist_head *dquot_hash; struct dqstats dqstats; EXPORT_SYMBOL(dqstats); +#ifdef CONFIG_SMP +struct dqstats *dqstats_pcpu; +EXPORT_SYMBOL(dqstats_pcpu); +#endif static qsize_t inode_get_rsv_space(struct inode *inode); static void __dquot_initialize(struct inode *inode, int type); @@ -275,7 +279,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, static inline void put_dquot_last(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &free_dquots); - dqstats.free_dquots++; + dqstats_inc(DQST_FREE_DQUOTS); } static inline void remove_free_dquot(struct dquot *dquot) @@ -283,7 +287,7 @@ static inline void remove_free_dquot(struct dquot *dquot) if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); - dqstats.free_dquots--; + dqstats_dec(DQST_FREE_DQUOTS); } static inline void put_inuse(struct dquot *dquot) @@ -291,12 +295,12 @@ static inline void put_inuse(struct dquot *dquot) /* We add to the back of inuse list so we don't have to restart * when traversing this list and we block */ list_add_tail(&dquot->dq_inuse, &inuse_list); - dqstats.allocated_dquots++; + dqstats_inc(DQST_ALLOC_DQUOTS); } static inline void remove_inuse(struct dquot *dquot) { - dqstats.allocated_dquots--; + dqstats_dec(DQST_ALLOC_DQUOTS); list_del(&dquot->dq_inuse); } /* @@ -561,8 +565,8 @@ int dquot_scan_active(struct super_block *sb, continue; /* Now we have active dquot so we can just increase use count */ atomic_inc(&dquot->dq_count); - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_LOOKUPS); dqput(old_dquot); old_dquot = dquot; ret = fn(dquot, priv); @@ -607,8 +611,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) * holding reference so we can safely just increase * use count */ atomic_inc(&dquot->dq_count); - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_LOOKUPS); sb->dq_op->write_dquot(dquot); dqput(dquot); spin_lock(&dq_list_lock); @@ -620,9 +624,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait) if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); - spin_lock(&dq_list_lock); - dqstats.syncs++; - spin_unlock(&dq_list_lock); + dqstats_inc(DQST_SYNCS); mutex_unlock(&dqopt->dqonoff_mutex); if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) @@ -674,6 +676,22 @@ static void prune_dqcache(int count) } } +static int dqstats_read(unsigned int type) +{ + int count = 0; +#ifdef CONFIG_SMP + int cpu; + for_each_possible_cpu(cpu) + count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type]; + /* Statistics reading is racy, but absolute accuracy isn't required */ + if (count < 0) + count = 0; +#else + count = dqstats.stat[type]; +#endif + return count; +} + /* * This is called from kswapd when we think we need some * more memory @@ -686,7 +704,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) prune_dqcache(nr); spin_unlock(&dq_list_lock); } - return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; + return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure; } static struct shrinker dqcache_shrinker = { @@ -714,10 +732,7 @@ void dqput(struct dquot *dquot) BUG(); } #endif - - spin_lock(&dq_list_lock); - dqstats.drops++; - spin_unlock(&dq_list_lock); + dqstats_inc(DQST_DROPS); we_slept: spin_lock(&dq_list_lock); if (atomic_read(&dquot->dq_count) > 1) { @@ -834,15 +849,15 @@ we_slept: put_inuse(dquot); /* hash it first so it can be found */ insert_dquot_hash(dquot); - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_LOOKUPS); } else { if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); atomic_inc(&dquot->dq_count); - dqstats.cache_hits++; - dqstats.lookups++; spin_unlock(&dq_list_lock); + dqstats_inc(DQST_CACHE_HITS); + dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is * already finished or it will be canceled due to dq_count > 1 test */ @@ -2476,62 +2491,74 @@ const struct quotactl_ops vfs_quotactl_ops = { .set_dqblk = vfs_set_dqblk }; + +static int do_proc_dqstats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ +#ifdef CONFIG_SMP + /* Update global table */ + unsigned int type = (int *)table->data - dqstats.stat; + dqstats.stat[type] = dqstats_read(type); +#endif + return proc_dointvec(table, write, buffer, lenp, ppos); +} + static ctl_table fs_dqstats_table[] = { { .procname = "lookups", - .data = &dqstats.lookups, + .data = &dqstats.stat[DQST_LOOKUPS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "drops", - .data = &dqstats.drops, + .data = &dqstats.stat[DQST_DROPS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "reads", - .data = &dqstats.reads, + .data = &dqstats.stat[DQST_READS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "writes", - .data = &dqstats.writes, + .data = &dqstats.stat[DQST_WRITES], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "cache_hits", - .data = &dqstats.cache_hits, + .data = &dqstats.stat[DQST_CACHE_HITS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "allocated_dquots", - .data = &dqstats.allocated_dquots, + .data = &dqstats.stat[DQST_ALLOC_DQUOTS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "free_dquots", - .data = &dqstats.free_dquots, + .data = &dqstats.stat[DQST_FREE_DQUOTS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, { .procname = "syncs", - .data = &dqstats.syncs, + .data = &dqstats.stat[DQST_SYNCS], .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = do_proc_dqstats, }, #ifdef CONFIG_PRINT_QUOTA_WARNING { @@ -2583,6 +2610,13 @@ static int __init dquot_init(void) if (!dquot_hash) panic("Cannot create dquot hash table"); +#ifdef CONFIG_SMP + dqstats_pcpu = alloc_percpu(struct dqstats); + if (!dqstats_pcpu) + panic("Cannot create dquot stats table"); +#endif + memset(&dqstats, 0, sizeof(struct dqstats)); + /* Find power-of-two hlist_heads which can fit into allocation */ nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); dq_hash_bits = 0; diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index f81f4bcfb178..5b7f7416ec7a 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) } else { ret = 0; } - dqstats.writes++; + dqstats_inc(DQST_WRITES); kfree(ddquot); return ret; @@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) spin_unlock(&dq_data_lock); kfree(ddquot); out: - dqstats.reads++; + dqstats_inc(DQST_READS); return ret; } EXPORT_SYMBOL(qtree_read_dquot); diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 2ae757e9c008..4af344c5852a 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c @@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot) dquot->dq_dqb.dqb_ihardlimit == 0 && dquot->dq_dqb.dqb_isoftlimit == 0) set_bit(DQ_FAKE_B, &dquot->dq_flags); - dqstats.reads++; + dqstats_inc(DQST_READS); return 0; } @@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot) ret = 0; out: - dqstats.writes++; + dqstats_inc(DQST_WRITES); return ret; } diff --git a/include/linux/quota.h b/include/linux/quota.h index b462916b2a0a..cdfde10481b7 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -174,6 +174,8 @@ enum { #include #include #include +#include +#include #include #include @@ -238,19 +240,43 @@ static inline int info_dirty(struct mem_dqinfo *info) return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); } +enum { + DQST_LOOKUPS, + DQST_DROPS, + DQST_READS, + DQST_WRITES, + DQST_CACHE_HITS, + DQST_ALLOC_DQUOTS, + DQST_FREE_DQUOTS, + DQST_SYNCS, + _DQST_DQSTAT_LAST +}; + struct dqstats { - int lookups; - int drops; - int reads; - int writes; - int cache_hits; - int allocated_dquots; - int free_dquots; - int syncs; + int stat[_DQST_DQSTAT_LAST]; }; +extern struct dqstats *dqstats_pcpu; extern struct dqstats dqstats; +static inline void dqstats_inc(unsigned int type) +{ +#ifdef CONFIG_SMP + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++; +#else + dqstats.stat[type]++; +#endif +} + +static inline void dqstats_dec(unsigned int type) +{ +#ifdef CONFIG_SMP + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--; +#else + dqstats.stat[type]--; +#endif +} + #define DQ_MOD_B 0 /* dquot modified since read */ #define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */ #define DQ_INODES_B 2 /* uid/gid has been warned about inode limit */ -- cgit v1.2.2 From 0636c73ee7b129f77f577aaaefc8dde057be6d18 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 30 Apr 2010 11:09:34 -0500 Subject: ext3: make barrier options consistent with ext4 ext4 was updated to accept barrier/nobarrier mount options in addition to the older barrier=0/1. The barrier story is complex enough, we should help people by making the options the same at least, even if the defaults are different. This patch allows the barrier/nobarrier mount options for ext3, while keeping nobarrier the default. It also unconditionally displays barrier status in show_options, and prints a message at mount time if barriers are not enabled, just as ext4 does. Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- Documentation/filesystems/ext3.txt | 15 +++++++++++++-- fs/ext3/super.c | 33 ++++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 867c5b50cb42..272f80d5f966 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -59,8 +59,19 @@ commit=nrsec (*) Ext3 can be told to sync all its data and metadata Setting it to very large values will improve performance. -barrier=1 This enables/disables barriers. barrier=0 disables - it, barrier=1 enables it. +barrier=<0(*)|1> This enables/disables the use of write barriers in +barrier the jbd code. barrier=0 disables, barrier=1 enables. +nobarrier (*) This also requires an IO stack which can support + barriers, and if jbd gets an error on a barrier + write, it will disable again with a warning. + Write barriers enforce proper on-disk ordering + of journal commits, making volatile disk write caches + safe to use, at some performance penalty. If + your disks are battery-backed in one way or another, + disabling barriers may safely improve performance. + The mount options "barrier" and "nobarrier" can + also be used to enable or disable barriers, for + consistency with other ext3 mount options. orlov (*) This enables the new Orlov block allocator. It is enabled by default. diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6b6e49de0916..0fc1293d0e96 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -653,8 +653,12 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } - if (test_opt(sb, BARRIER)) - seq_puts(seq, ",barrier=1"); + + /* + * Always display barrier state so it's clear what the status is. + */ + seq_puts(seq, ",barrier="); + seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); @@ -810,8 +814,8 @@ enum { Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, - Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_usrquota, Opt_grpquota + Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, + Opt_resize, Opt_usrquota, Opt_grpquota }; static const match_table_t tokens = { @@ -865,6 +869,8 @@ static const match_table_t tokens = { {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, {Opt_resize, "resize"}, {Opt_err, NULL}, }; @@ -967,7 +973,11 @@ static int parse_options (char *options, struct super_block *sb, int token; if (!*p) continue; - + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: @@ -1215,9 +1225,15 @@ set_qf_format: case Opt_abort: set_opt(sbi->s_mount_opt, ABORT); break; + case Opt_nobarrier: + clear_opt(sbi->s_mount_opt, BARRIER); + break; case Opt_barrier: - if (match_int(&args[0], &option)) - return 0; + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) set_opt(sbi->s_mount_opt, BARRIER); else @@ -2276,6 +2292,9 @@ static int ext3_load_journal(struct super_block *sb, return -EINVAL; } + if (!(journal->j_flags & JFS_BARRIER)) + printk(KERN_INFO "EXT3-fs: barriers not enabled\n"); + if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { err = journal_update_format(journal); if (err) { -- cgit v1.2.2 From b9b2dd36c1bc64430f8e13990ab135cbecc10076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 May 2010 17:04:58 -0400 Subject: quota: unify ->get_dqblk Pass the larger struct fs_disk_quota to the ->get_dqblk operation so that the Q_GETQUOTA and Q_XGETQUOTA operations can be implemented with a single filesystem operation and we can retire the ->get_xquota operation. The additional information (RT-subvolume accounting and warn counts) are left zero for the VFS quota implementation. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/gfs2/quota.c | 6 +++--- fs/quota/dquot.c | 27 ++++++++++++++++----------- fs/quota/quota.c | 23 ++++++++++++++++++----- fs/xfs/linux-2.6/xfs_quotaops.c | 4 ++-- include/linux/quota.h | 3 +-- include/linux/quotaops.h | 3 ++- 6 files changed, 42 insertions(+), 24 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index d5f4661287f9..dec93577a783 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1476,8 +1476,8 @@ static int gfs2_quota_get_xstate(struct super_block *sb, return 0; } -static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, - struct fs_disk_quota *fdq) +static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_lvb *qlvb; @@ -1629,7 +1629,7 @@ out_put: const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, - .get_xquota = gfs2_xquota_get, + .get_dqblk = gfs2_get_dqblk, .set_xquota = gfs2_xquota_set, }; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 01347e81d0ca..6aed77fc99c7 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2301,25 +2301,30 @@ static inline qsize_t stoqb(qsize_t space) } /* Generic routine for getting common part of quota structure */ -static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) +static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) { struct mem_dqblk *dm = &dquot->dq_dqb; + memset(di, 0, sizeof(*di)); + di->d_version = FS_DQUOT_VERSION; + di->d_flags = dquot->dq_type == USRQUOTA ? + XFS_USER_QUOTA : XFS_GROUP_QUOTA; + di->d_id = dquot->dq_id; + spin_lock(&dq_data_lock); - di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit); - di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit); - di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace; - di->dqb_ihardlimit = dm->dqb_ihardlimit; - di->dqb_isoftlimit = dm->dqb_isoftlimit; - di->dqb_curinodes = dm->dqb_curinodes; - di->dqb_btime = dm->dqb_btime; - di->dqb_itime = dm->dqb_itime; - di->dqb_valid = QIF_ALL; + di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); + di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); + di->d_ino_hardlimit = dm->dqb_ihardlimit; + di->d_ino_softlimit = dm->dqb_isoftlimit; + di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace; + di->d_icount = dm->dqb_curinodes; + di->d_btimer = dm->dqb_btime; + di->d_itimer = dm->dqb_itime; spin_unlock(&dq_data_lock); } int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, - struct if_dqblk *di) + struct fs_disk_quota *di) { struct dquot *dquot; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 95388f9b7356..8680e257c2bd 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -136,19 +136,32 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) return sb->s_qcop->set_info(sb, type, &info); } +static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) +{ + dst->dqb_bhardlimit = src->d_blk_hardlimit; + dst->dqb_bsoftlimit = src->d_blk_softlimit; + dst->dqb_curspace = src->d_bcount; + dst->dqb_ihardlimit = src->d_ino_hardlimit; + dst->dqb_isoftlimit = src->d_ino_softlimit; + dst->dqb_curinodes = src->d_icount; + dst->dqb_btime = src->d_btimer; + dst->dqb_itime = src->d_itimer; + dst->dqb_valid = QIF_ALL; +} + static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { + struct fs_disk_quota fdq; struct if_dqblk idq; int ret; - if (!sb_has_quota_active(sb, type)) - return -ESRCH; if (!sb->s_qcop->get_dqblk) return -ENOSYS; - ret = sb->s_qcop->get_dqblk(sb, type, id, &idq); + ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); if (ret) return ret; + copy_to_if_dqblk(&idq, &fdq); if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; return 0; @@ -210,9 +223,9 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, struct fs_disk_quota fdq; int ret; - if (!sb->s_qcop->get_xquota) + if (!sb->s_qcop->get_dqblk) return -ENOSYS; - ret = sb->s_qcop->get_xquota(sb, type, id, &fdq); + ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 1947514ce1ad..3d473f43c9a9 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -97,7 +97,7 @@ xfs_fs_set_xstate( } STATIC int -xfs_fs_get_xquota( +xfs_fs_get_dqblk( struct super_block *sb, int type, qid_t id, @@ -135,6 +135,6 @@ xfs_fs_set_xquota( const struct quotactl_ops xfs_quotactl_operations = { .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, - .get_xquota = xfs_fs_get_xquota, + .get_dqblk = xfs_fs_get_dqblk, .set_xquota = xfs_fs_set_xquota, }; diff --git a/include/linux/quota.h b/include/linux/quota.h index cdfde10481b7..42364219dc9b 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -337,11 +337,10 @@ struct quotactl_ops { int (*quota_sync)(struct super_block *, int, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); + int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); - int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); }; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index e6fa7acce290..d32a48631b0d 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -63,7 +63,8 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags); int vfs_quota_sync(struct super_block *sb, int type, int wait); int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *di); int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); int dquot_transfer(struct inode *inode, struct iattr *iattr); -- cgit v1.2.2 From c472b43275976512e4c1c32da5ced03f339cb380 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 May 2010 17:05:17 -0400 Subject: quota: unify ->set_dqblk Pass the larger struct fs_disk_quota to the ->set_dqblk operation so that the Q_SETQUOTA and Q_XSETQUOTA operations can be implemented with a single filesystem operation and we can retire the ->set_xquota operation. The additional information (RT-subvolume accounting and warn counts) are left zero for the VFS quota implementation. Add new fieldmask values for setting the numer of blocks and inodes values which is required for the VFS quota, but wasn't for XFS. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/gfs2/quota.c | 6 ++-- fs/quota/dquot.c | 67 ++++++++++++++++++++++++++--------------- fs/quota/quota.c | 36 +++++++++++++++++++--- fs/xfs/linux-2.6/xfs_quotaops.c | 4 +-- fs/xfs/quota/xfs_qm_syscalls.c | 10 ++++-- include/linux/dqblk_xfs.h | 9 ++++++ include/linux/quota.h | 3 +- include/linux/quotaops.h | 3 +- 8 files changed, 98 insertions(+), 40 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index dec93577a783..49667d68769e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1521,8 +1521,8 @@ out: /* GFS2 only supports a subset of the XFS fields */ #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) -static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, - struct fs_disk_quota *fdq) +static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); @@ -1630,6 +1630,6 @@ const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, .get_dqblk = gfs2_get_dqblk, - .set_xquota = gfs2_xquota_set, + .set_dqblk = gfs2_set_dqblk, }; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 6aed77fc99c7..b1a5036560a9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2338,51 +2338,70 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, } EXPORT_SYMBOL(vfs_get_dqblk); +#define VFS_FS_DQ_MASK \ + (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ + FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \ + FS_DQ_BTIMER | FS_DQ_ITIMER) + /* Generic routine for setting common part of quota structure */ -static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) +static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; - if ((di->dqb_valid & QIF_BLIMITS && - (di->dqb_bhardlimit > dqi->dqi_maxblimit || - di->dqb_bsoftlimit > dqi->dqi_maxblimit)) || - (di->dqb_valid & QIF_ILIMITS && - (di->dqb_ihardlimit > dqi->dqi_maxilimit || - di->dqb_isoftlimit > dqi->dqi_maxilimit))) + if (di->d_fieldmask & ~VFS_FS_DQ_MASK) + return -EINVAL; + + if (((di->d_fieldmask & FS_DQ_BSOFT) && + (di->d_blk_softlimit > dqi->dqi_maxblimit)) || + ((di->d_fieldmask & FS_DQ_BHARD) && + (di->d_blk_hardlimit > dqi->dqi_maxblimit)) || + ((di->d_fieldmask & FS_DQ_ISOFT) && + (di->d_ino_softlimit > dqi->dqi_maxilimit)) || + ((di->d_fieldmask & FS_DQ_IHARD) && + (di->d_ino_hardlimit > dqi->dqi_maxilimit))) return -ERANGE; spin_lock(&dq_data_lock); - if (di->dqb_valid & QIF_SPACE) { - dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace; + if (di->d_fieldmask & FS_DQ_BCOUNT) { + dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_BLIMITS) { - dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit); - dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit); + + if (di->d_fieldmask & FS_DQ_BSOFT) + dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit); + if (di->d_fieldmask & FS_DQ_BHARD) + dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit); + if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) { check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_INODES) { - dm->dqb_curinodes = di->dqb_curinodes; + + if (di->d_fieldmask & FS_DQ_ICOUNT) { + dm->dqb_curinodes = di->d_icount; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_ILIMITS) { - dm->dqb_isoftlimit = di->dqb_isoftlimit; - dm->dqb_ihardlimit = di->dqb_ihardlimit; + + if (di->d_fieldmask & FS_DQ_ISOFT) + dm->dqb_isoftlimit = di->d_ino_softlimit; + if (di->d_fieldmask & FS_DQ_IHARD) + dm->dqb_ihardlimit = di->d_ino_hardlimit; + if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) { check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_BTIME) { - dm->dqb_btime = di->dqb_btime; + + if (di->d_fieldmask & FS_DQ_BTIMER) { + dm->dqb_btime = di->d_btimer; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_ITIME) { - dm->dqb_itime = di->dqb_itime; + + if (di->d_fieldmask & FS_DQ_ITIMER) { + dm->dqb_itime = di->d_itimer; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } @@ -2392,7 +2411,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) dm->dqb_curspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); - } else if (!(di->dqb_valid & QIF_BTIME)) + } else if (!(di->d_fieldmask & FS_DQ_BTIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; } @@ -2401,7 +2420,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) dm->dqb_curinodes < dm->dqb_isoftlimit) { dm->dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); - } else if (!(di->dqb_valid & QIF_ITIME)) + } else if (!(di->d_fieldmask & FS_DQ_ITIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = get_seconds() + dqi->dqi_igrace; } @@ -2417,7 +2436,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) } int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, - struct if_dqblk *di) + struct fs_disk_quota *di) { struct dquot *dquot; int rc; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 8680e257c2bd..d6ee49dda4fd 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -167,18 +167,44 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, return 0; } +static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src) +{ + dst->d_blk_hardlimit = src->dqb_bhardlimit; + dst->d_blk_softlimit = src->dqb_bsoftlimit; + dst->d_bcount = src->dqb_curspace; + dst->d_ino_hardlimit = src->dqb_ihardlimit; + dst->d_ino_softlimit = src->dqb_isoftlimit; + dst->d_icount = src->dqb_curinodes; + dst->d_btimer = src->dqb_btime; + dst->d_itimer = src->dqb_itime; + + dst->d_fieldmask = 0; + if (src->dqb_valid & QIF_BLIMITS) + dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD; + if (src->dqb_valid & QIF_SPACE) + dst->d_fieldmask |= FS_DQ_BCOUNT; + if (src->dqb_valid & QIF_ILIMITS) + dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD; + if (src->dqb_valid & QIF_INODES) + dst->d_fieldmask |= FS_DQ_ICOUNT; + if (src->dqb_valid & QIF_BTIME) + dst->d_fieldmask |= FS_DQ_BTIMER; + if (src->dqb_valid & QIF_ITIME) + dst->d_fieldmask |= FS_DQ_ITIMER; +} + static int quota_setquota(struct super_block *sb, int type, qid_t id, void __user *addr) { + struct fs_disk_quota fdq; struct if_dqblk idq; if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; - if (!sb_has_quota_active(sb, type)) - return -ESRCH; if (!sb->s_qcop->set_dqblk) return -ENOSYS; - return sb->s_qcop->set_dqblk(sb, type, id, &idq); + copy_from_if_dqblk(&fdq, &idq); + return sb->s_qcop->set_dqblk(sb, type, id, &fdq); } static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) @@ -212,9 +238,9 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id, if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; - if (!sb->s_qcop->set_xquota) + if (!sb->s_qcop->set_dqblk) return -ENOSYS; - return sb->s_qcop->set_xquota(sb, type, id, &fdq); + return sb->s_qcop->set_dqblk(sb, type, id, &fdq); } static int quota_getxquota(struct super_block *sb, int type, qid_t id, diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 3d473f43c9a9..e31bf21fe5d3 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -114,7 +114,7 @@ xfs_fs_get_dqblk( } STATIC int -xfs_fs_set_xquota( +xfs_fs_set_dqblk( struct super_block *sb, int type, qid_t id, @@ -136,5 +136,5 @@ const struct quotactl_ops xfs_quotactl_operations = { .get_xstate = xfs_fs_get_xstate, .set_xstate = xfs_fs_set_xstate, .get_dqblk = xfs_fs_get_dqblk, - .set_xquota = xfs_fs_set_xquota, + .set_dqblk = xfs_fs_set_dqblk, }; diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 26fa43140f2e..92b002f1805f 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -448,6 +448,9 @@ xfs_qm_scall_getqstat( return 0; } +#define XFS_DQ_MASK \ + (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) + /* * Adjust quota limits, and start/stop timers accordingly. */ @@ -465,9 +468,10 @@ xfs_qm_scall_setqlim( int error; xfs_qcnt_t hard, soft; - if ((newlim->d_fieldmask & - (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) - return (0); + if (newlim->d_fieldmask & ~XFS_DQ_MASK) + return EINVAL; + if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) + return 0; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, diff --git a/include/linux/dqblk_xfs.h b/include/linux/dqblk_xfs.h index 527504c11c5e..4389ae72024e 100644 --- a/include/linux/dqblk_xfs.h +++ b/include/linux/dqblk_xfs.h @@ -109,6 +109,15 @@ typedef struct fs_disk_quota { #define FS_DQ_RTBWARNS (1<<11) #define FS_DQ_WARNS_MASK (FS_DQ_BWARNS | FS_DQ_IWARNS | FS_DQ_RTBWARNS) +/* + * Accounting values. These can only be set for filesystem with + * non-transactional quotas that require quotacheck(8) in userspace. + */ +#define FS_DQ_BCOUNT (1<<12) +#define FS_DQ_ICOUNT (1<<13) +#define FS_DQ_RTBCOUNT (1<<14) +#define FS_DQ_ACCT_MASK (FS_DQ_BCOUNT | FS_DQ_ICOUNT | FS_DQ_RTBCOUNT) + /* * Various flags related to quotactl(2). Only relevant to XFS filesystems. */ diff --git a/include/linux/quota.h b/include/linux/quota.h index 42364219dc9b..7126a15467f1 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -338,10 +338,9 @@ struct quotactl_ops { int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); - int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *); + int (*set_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); - int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *); }; struct quota_format_type { diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d32a48631b0d..82c70c42d035 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -65,7 +65,8 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct fs_disk_quota *di); -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *di); int dquot_transfer(struct inode *inode, struct iattr *iattr); int vfs_dq_quota_on_remount(struct super_block *sb); -- cgit v1.2.2 From fcbc59f96e38a0999e827be9d04d46b62b53b20a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 May 2010 12:35:40 -0400 Subject: quota: remove sb_has_quota_active in get/set_info The methods already do these checks, so remove them in the quotactl implementation to allow non-VFS quota implementations to also support these calls. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/quota/quota.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index d6ee49dda4fd..cfc78826da90 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -113,8 +113,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr) struct if_dqinfo info; int ret; - if (!sb_has_quota_active(sb, type)) - return -ESRCH; if (!sb->s_qcop->get_info) return -ENOSYS; ret = sb->s_qcop->get_info(sb, type, &info); @@ -129,8 +127,6 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) if (copy_from_user(&info, addr, sizeof(info))) return -EFAULT; - if (!sb_has_quota_active(sb, type)) - return -ESRCH; if (!sb->s_qcop->set_info) return -ENOSYS; return sb->s_qcop->set_info(sb, type, &info); -- cgit v1.2.2 From 12755627bdcddcdb30a1bfb9a09395a52b1d6838 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 8 Apr 2010 22:04:20 +0400 Subject: quota: unify quota init condition in setattr Quota must being initialized if size or uid/git changes requested. But initialization performed in two different places: in case of i_size file system is responsible for dquot init , but in case of uid/gid init will be called internally in dquot_transfer(). This ambiguity makes code harder to understand. Let's move this logic to one common helper function. Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext2/inode.c | 2 +- fs/ext3/inode.c | 2 +- fs/ext4/inode.c | 2 +- fs/jfs/file.c | 2 +- fs/ocfs2/file.c | 4 ++-- fs/quota/dquot.c | 5 ++--- fs/reiserfs/inode.c | 3 ++- fs/udf/file.c | 2 +- fs/ufs/truncate.c | 8 ++++---- include/linux/quotaops.h | 8 ++++++++ 10 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b90c3bf6e9ba..527c46d9bc1f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1466,7 +1466,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; - if (iattr->ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, iattr)) dquot_initialize(inode); if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ea33bdf0a300..735f0190ec2a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3151,7 +3151,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if (ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, attr)) dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 81d605412844..3e0f6af9d08d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5425,7 +5425,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if (ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, attr)) dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 14ba982b3f24..85d9ec659225 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -98,7 +98,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) if (rc) return rc; - if (iattr->ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, iattr)) dquot_initialize(inode); if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f74f1400eccd..e127c53ec2e7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -966,10 +966,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (status) return status; + if (is_quota_modification(inode, attr)) + dquot_initialize(inode); size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; if (size_change) { - dquot_initialize(inode); - status = ocfs2_rw_lock(inode, 1); if (status < 0) { mlog_errno(status); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index b1a5036560a9..1056a21f0300 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1822,10 +1822,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) mask |= 1 << GRPQUOTA; chid[GRPQUOTA] = iattr->ia_gid; } - if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { - dquot_initialize(inode); + if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) return __dquot_transfer(inode, chid, mask); - } + return 0; } EXPORT_SYMBOL(dquot_transfer); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index dc2c65e04853..0f22fdaf54ac 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3076,9 +3076,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); depth = reiserfs_write_lock_once(inode->i_sb); - if (attr->ia_valid & ATTR_SIZE) { + if (is_quota_modification(inode, attr)) dquot_initialize(inode); + if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate */ diff --git a/fs/udf/file.c b/fs/udf/file.c index 4b6a46ccbf46..6ebc043f3a2a 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -227,7 +227,7 @@ int udf_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; - if (iattr->ia_valid & ATTR_SIZE) + if (is_quota_modification(inode, iattr)) dquot_initialize(inode); if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index ee8db3e77bfe..f294c44577dc 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -518,18 +518,18 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + if (is_quota_modification(inode, attr)) + dquot_initialize(inode); + if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { error = dquot_transfer(inode, attr); if (error) return error; } - if (ia_valid & ATTR_SIZE && - attr->ia_size != i_size_read(inode)) { + if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { loff_t old_i_size = inode->i_size; - dquot_initialize(inode); - error = vmtruncate(inode, attr->ia_size); if (error) return error; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 82c70c42d035..8a7818764a67 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -14,6 +14,14 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) return &sb->s_dquot; } +/* i_mutex must being held */ +static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) +{ + return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) || + (ia->ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || + (ia->ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid); +} + #if defined(CONFIG_QUOTA) /* -- cgit v1.2.2 From bc8e5f07392f05c47c8bdeff4f7098db440d065c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 May 2010 19:58:50 +0200 Subject: quota: Refactor dquot_transfer code so that OCFS2 can pass in its references Currently, __dquot_transfer() acquires its own references of dquot structures that will be put into inode. But for OCFS2, this creates a lock inversion between dq_lock (waited on in dqget) and transaction start (started in ocfs2_setattr). Currently, deadlock is impossible because dq_lock is acquired only during dquot_acquire and dquot_release and we already hold a reference to dquot structures in ocfs2_setattr so neither of these functions can be called while we call dquot_transfer. But this is rather subtle and it is hard to teach lockdep about it. So provide __dquot_transfer function that can be passed dquot references directly. OCFS2 can then pass acquired dquot references directly to __dquot_transfer with proper locking. Signed-off-by: Jan Kara --- fs/quota/dquot.c | 61 +++++++++++++++++++++--------------------------- include/linux/quotaops.h | 1 + 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 1056a21f0300..655a4c52b8c3 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1703,16 +1703,19 @@ EXPORT_SYMBOL(dquot_free_inode); /* * Transfer the number of inode and blocks from one diskquota to an other. + * On success, dquot references in transfer_to are consumed and references + * to original dquots that need to be released are placed there. On failure, + * references are kept untouched. * * This operation can block, but only after everything is updated * A transaction must be started when entering this function. + * */ -static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask) +int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) { qsize_t space, cur_space; qsize_t rsv_space = 0; - struct dquot *transfer_from[MAXQUOTAS]; - struct dquot *transfer_to[MAXQUOTAS]; + struct dquot *transfer_from[MAXQUOTAS] = {}; int cnt, ret = 0; char warntype_to[MAXQUOTAS]; char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; @@ -1722,19 +1725,12 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask if (IS_NOQUOTA(inode)) return 0; /* Initialize the arrays */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - transfer_from[cnt] = NULL; - transfer_to[cnt] = NULL; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype_to[cnt] = QUOTA_NL_NOWARN; - } - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (mask & (1 << cnt)) - transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt); - } down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - goto put_all; + return 0; } spin_lock(&dq_data_lock); cur_space = inode_get_bytes(inode); @@ -1786,46 +1782,41 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_to); - /* The reference we got is transferred to the inode */ + /* Pass back references to put */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) - transfer_to[cnt] = NULL; -warn_put_all: + transfer_to[cnt] = transfer_from[cnt]; +warn: flush_warnings(transfer_to, warntype_to); flush_warnings(transfer_from, warntype_from_inodes); flush_warnings(transfer_from, warntype_from_space); -put_all: - dqput_all(transfer_from); - dqput_all(transfer_to); return ret; over_quota: spin_unlock(&dq_data_lock); up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Clear dquot pointers we don't want to dqput() */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) - transfer_from[cnt] = NULL; - goto warn_put_all; + goto warn; } +EXPORT_SYMBOL(__dquot_transfer); /* Wrapper for transferring ownership of an inode for uid/gid only * Called from FSXXX_setattr() */ int dquot_transfer(struct inode *inode, struct iattr *iattr) { - qid_t chid[MAXQUOTAS]; - unsigned long mask = 0; + struct dquot *transfer_to[MAXQUOTAS] = {}; + struct super_block *sb = inode->i_sb; + int ret; - if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) { - mask |= 1 << USRQUOTA; - chid[USRQUOTA] = iattr->ia_uid; - } - if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) { - mask |= 1 << GRPQUOTA; - chid[GRPQUOTA] = iattr->ia_gid; - } - if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) - return __dquot_transfer(inode, chid, mask); + if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode)) + return 0; - return 0; + if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) + transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); + if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) + transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA); + + ret = __dquot_transfer(inode, transfer_to); + dqput_all(transfer_to); + return ret; } EXPORT_SYMBOL(dquot_transfer); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 8a7818764a67..370abb1e99cb 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -76,6 +76,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct fs_disk_quota *di); +int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); int dquot_transfer(struct inode *inode, struct iattr *iattr); int vfs_dq_quota_on_remount(struct super_block *sb); -- cgit v1.2.2 From f64dd44eb748438783b10b3f7a4968d2656a3c95 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 28 Apr 2010 00:22:30 +0200 Subject: ocfs2: Do not map blocks from local quota file on each write There is no need to map offset of local dquot structure to on disk block in each quota write. It is enough to map it just once and store the physical block number in quota structure in memory. Moreover this simplifies locking as we do not have to take ip_alloc_sem from quota write path. Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/ocfs2/quota.h | 3 +++ fs/ocfs2/quota_global.c | 14 ++++++++++++++ fs/ocfs2/quota_local.c | 28 +++++++++++++++++++--------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 123bc520a2c0..e22d2935128e 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -23,6 +23,7 @@ struct ocfs2_dquot { struct dquot dq_dquot; /* Generic VFS dquot */ loff_t dq_local_off; /* Offset in the local quota file */ + u64 dq_local_phys_blk; /* Physical block carrying quota structure */ struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */ unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ s64 dq_origspace; /* Last globally synced space usage */ @@ -104,6 +105,8 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); int ocfs2_read_quota_block(struct inode *inode, u64 v_block, struct buffer_head **bh); +int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, + struct buffer_head **bh); extern const struct dquot_operations ocfs2_quota_operations; extern struct quota_format_type ocfs2_quota_format; diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 04ae76d8c6ab..f461f9678f9f 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -25,6 +25,7 @@ #include "dlmglue.h" #include "uptodate.h" #include "super.h" +#include "buffer_head_io.h" #include "quota.h" static struct workqueue_struct *ocfs2_quota_wq = NULL; @@ -137,6 +138,19 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block, return rc; } +int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, + struct buffer_head **bhp) +{ + int rc; + + *bhp = NULL; + rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, 1, bhp, 0, + ocfs2_validate_quota_block); + if (rc) + mlog_errno(rc); + return rc; +} + static int ocfs2_get_quota_block(struct inode *inode, int block, struct buffer_head **bh) { diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 884b641f199e..a88f1d1ec2b4 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -862,18 +862,17 @@ static int ocfs2_local_write_dquot(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); - struct buffer_head *bh = NULL; + struct buffer_head *bh; + struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type]; int status; - status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], - ol_dqblk_file_block(sb, od->dq_local_off), - &bh); + status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk, + &bh); if (status) { mlog_errno(status); goto out; } - status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh, - olq_set_dquot, od); + status = ocfs2_modify_bh(lqinode, bh, olq_set_dquot, od); if (status < 0) { mlog_errno(status); goto out; @@ -1197,17 +1196,27 @@ static int ocfs2_create_local_dquot(struct dquot *dquot) struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); int offset; int status; + u64 pcount; + down_write(&OCFS2_I(lqinode)->ip_alloc_sem); chunk = ocfs2_find_free_entry(sb, type, &offset); if (!chunk) { chunk = ocfs2_extend_local_quota_file(sb, type, &offset); - if (IS_ERR(chunk)) - return PTR_ERR(chunk); + if (IS_ERR(chunk)) { + status = PTR_ERR(chunk); + goto out; + } } else if (IS_ERR(chunk)) { - return PTR_ERR(chunk); + status = PTR_ERR(chunk); + goto out; } od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset); od->dq_chunk = chunk; + status = ocfs2_extent_map_get_blocks(lqinode, + ol_dqblk_block(sb, chunk->qc_num, offset), + &od->dq_local_phys_blk, + &pcount, + NULL); /* Initialize dquot structure on disk */ status = ocfs2_local_write_dquot(dquot); @@ -1224,6 +1233,7 @@ static int ocfs2_create_local_dquot(struct dquot *dquot) goto out; } out: + up_write(&OCFS2_I(lqinode)->ip_alloc_sem); return status; } -- cgit v1.2.2 From ae4f6ef13417deaa49471c0e903914a3ef3be258 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 28 Apr 2010 19:04:29 +0200 Subject: ocfs2: Avoid unnecessary block mapping when refreshing quota info The position of global quota file info does not change. So we do not have to do logical -> physical block translation every time we reread it from disk. Thus we can also avoid taking ip_alloc_sem. Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/ocfs2/dlmglue.c | 3 ++- fs/ocfs2/quota.h | 3 ++- fs/ocfs2/quota_global.c | 15 +++++++++++++++ fs/ocfs2/quota_local.c | 10 +++++----- 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 50c4ee805da4..39eb16ac5f98 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3897,7 +3897,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) oinfo->dqi_gi.dqi_free_entry = be32_to_cpu(lvb->lvb_free_entry); } else { - status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); + status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, + oinfo->dqi_giblk, &bh); if (status) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index e22d2935128e..c7623430ca3c 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -52,8 +52,9 @@ struct ocfs2_mem_dqinfo { struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */ int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */ + u64 dqi_giblk; /* Number of block with global information header */ struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ - struct buffer_head *dqi_ibh; /* Buffer with information header */ + struct buffer_head *dqi_libh; /* Buffer with local information header */ struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ struct delayed_work dqi_sync_work; /* Work for syncing dquots */ struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index f461f9678f9f..f391b11ea98c 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -325,6 +325,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type) struct ocfs2_global_disk_dqinfo dinfo; struct mem_dqinfo *info = sb_dqinfo(sb, type); struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + u64 pcount; int status; mlog_entry_void(); @@ -351,9 +352,19 @@ int ocfs2_global_read_info(struct super_block *sb, int type) mlog_errno(status); goto out_err; } + + status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, + &pcount, NULL); + if (status < 0) + goto out_unlock; + + status = ocfs2_qinfo_lock(oinfo, 0); + if (status < 0) + goto out_unlock; status = sb->s_op->quota_read(sb, type, (char *)&dinfo, sizeof(struct ocfs2_global_disk_dqinfo), OCFS2_GLOBAL_INFO_OFF); + ocfs2_qinfo_unlock(oinfo, 0); ocfs2_unlock_global_qf(oinfo, 0); if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { mlog(ML_ERROR, "Cannot read global quota info (%d).\n", @@ -380,6 +391,10 @@ int ocfs2_global_read_info(struct super_block *sb, int type) out_err: mlog_exit(status); return status; +out_unlock: + ocfs2_unlock_global_qf(oinfo, 0); + mlog_errno(status); + goto out_err; } /* Write information to global quota file. Expects exlusive lock on quota diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index a88f1d1ec2b4..962e8380852b 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -671,7 +671,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) INIT_LIST_HEAD(&oinfo->dqi_chunk); oinfo->dqi_rec = NULL; oinfo->dqi_lqi_bh = NULL; - oinfo->dqi_ibh = NULL; + oinfo->dqi_libh = NULL; status = ocfs2_global_read_info(sb, type); if (status < 0) @@ -697,7 +697,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); - oinfo->dqi_ibh = bh; + oinfo->dqi_libh = bh; /* We crashed when using local quota file? */ if (!(info->dqi_flags & OLQF_CLEAN)) { @@ -759,7 +759,7 @@ static int ocfs2_local_write_info(struct super_block *sb, int type) { struct mem_dqinfo *info = sb_dqinfo(sb, type); struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv) - ->dqi_ibh; + ->dqi_libh; int status; status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info, @@ -820,7 +820,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) /* Mark local file as clean */ info->dqi_flags |= OLQF_CLEAN; status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], - oinfo->dqi_ibh, + oinfo->dqi_libh, olq_update_info, info); if (status < 0) { @@ -830,7 +830,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) out: ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); - brelse(oinfo->dqi_ibh); + brelse(oinfo->dqi_libh); brelse(oinfo->dqi_lqi_bh); kfree(oinfo); return 0; -- cgit v1.2.2 From fb8dd8d780140a3f0e9074831a59054fec6cc451 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 31 Mar 2010 16:25:37 +0200 Subject: ocfs2: Fix quota locking OCFS2 had three issues with quota locking: a) When reading dquot from global quota file, we started a transaction while holding dqio_mutex which is prone to deadlocks because other paths do it the other way around b) During ocfs2_sync_dquot we were not protected against concurrent writers on the same node. Because we first copy data to local buffer, a race could happen resulting in old data being written to global quota file and thus causing quota inconsistency after a crash. c) ip_alloc_sem of quota files was acquired while a transaction is started in ocfs2_quota_write which can deadlock because we first get ip_alloc_sem and then start a transaction when extending quota files. We fix the problem a) by pulling all necessary code to ocfs2_acquire_dquot and ocfs2_release_dquot. Thus we no longer depend on generic dquot_acquire to do the locking and can force proper lock ordering. Problems b) and c) are fixed by locking i_mutex and ip_alloc_sem of global quota file in ocfs2_lock_global_qf and removing ip_alloc_sem from ocfs2_quota_read and ocfs2_quota_write. Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/ocfs2/quota.h | 5 +- fs/ocfs2/quota_global.c | 307 +++++++++++++++++++++++++++--------------------- fs/ocfs2/quota_local.c | 88 +++++++------- 3 files changed, 214 insertions(+), 186 deletions(-) diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index c7623430ca3c..903ffa933d53 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -104,10 +104,11 @@ static inline int ocfs2_global_release_dquot(struct dquot *dquot) int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); -int ocfs2_read_quota_block(struct inode *inode, u64 v_block, - struct buffer_head **bh); +int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh); int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, struct buffer_head **bh); +int ocfs2_create_local_dquot(struct dquot *dquot); +int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); extern const struct dquot_operations ocfs2_quota_operations; extern struct quota_format_type ocfs2_quota_format; diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index f391b11ea98c..1e3e0d5b3ae7 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -28,6 +28,41 @@ #include "buffer_head_io.h" #include "quota.h" +/* + * Locking of quotas with OCFS2 is rather complex. Here are rules that + * should be obeyed by all the functions: + * - any write of quota structure (either to local or global file) is protected + * by dqio_mutex or dquot->dq_lock. + * - any modification of global quota file holds inode cluster lock, i_mutex, + * and ip_alloc_sem of the global quota file (achieved by + * ocfs2_lock_global_qf). It also has to hold qinfo_lock. + * - an allocation of new blocks for local quota file is protected by + * its ip_alloc_sem + * + * A rough sketch of locking dependencies (lf = local file, gf = global file): + * Normal filesystem operation: + * start_trans -> dqio_mutex -> write to lf + * Syncing of local and global file: + * ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock -> + * write to gf + * -> write to lf + * Acquire dquot for the first time: + * dq_lock -> ocfs2_lock_global_qf -> qinfo_lock -> read from gf + * -> alloc space for gf + * -> start_trans -> qinfo_lock -> write to gf + * -> ip_alloc_sem of lf -> alloc space for lf + * -> write to lf + * Release last reference to dquot: + * dq_lock -> ocfs2_lock_global_qf -> start_trans -> qinfo_lock -> write to gf + * -> write to lf + * Note that all the above operations also hold the inode cluster lock of lf. + * Recovery: + * inode cluster lock of recovered lf + * -> read bitmaps -> ip_alloc_sem of lf + * -> ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock -> + * write to gf + */ + static struct workqueue_struct *ocfs2_quota_wq = NULL; static void qsync_work_fn(struct work_struct *work); @@ -92,8 +127,7 @@ struct qtree_fmt_operations ocfs2_global_ops = { .is_id = ocfs2_global_is_id, }; -static int ocfs2_validate_quota_block(struct super_block *sb, - struct buffer_head *bh) +int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh) { struct ocfs2_disk_dqtrailer *dqt = ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); @@ -111,33 +145,6 @@ static int ocfs2_validate_quota_block(struct super_block *sb, return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); } -int ocfs2_read_quota_block(struct inode *inode, u64 v_block, - struct buffer_head **bh) -{ - int rc = 0; - struct buffer_head *tmp = *bh; - - if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { - ocfs2_error(inode->i_sb, - "Quota file %llu is probably corrupted! Requested " - "to read block %Lu but file has size only %Lu\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - (unsigned long long)v_block, - (unsigned long long)i_size_read(inode)); - return -EIO; - } - rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, - ocfs2_validate_quota_block); - if (rc) - mlog_errno(rc); - - /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ - if (!rc && !*bh) - *bh = tmp; - - return rc; -} - int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, struct buffer_head **bhp) { @@ -151,27 +158,6 @@ int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, return rc; } -static int ocfs2_get_quota_block(struct inode *inode, int block, - struct buffer_head **bh) -{ - u64 pblock, pcount; - int err; - - down_read(&OCFS2_I(inode)->ip_alloc_sem); - err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL); - up_read(&OCFS2_I(inode)->ip_alloc_sem); - if (err) { - mlog_errno(err); - return err; - } - *bh = sb_getblk(inode->i_sb, pblock); - if (!*bh) { - err = -EIO; - mlog_errno(err); - } - return err; -} - /* Read data from global quotafile - avoid pagecache and such because we cannot * afford acquiring the locks... We use quota cluster lock to serialize * operations. Caller is responsible for acquiring it. */ @@ -186,6 +172,7 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, int err = 0; struct buffer_head *bh; size_t toread, tocopy; + u64 pblock = 0, pcount = 0; if (off > i_size) return 0; @@ -194,8 +181,19 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, toread = len; while (toread > 0) { tocopy = min_t(size_t, (sb->s_blocksize - offset), toread); + if (!pcount) { + err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, + &pcount, NULL); + if (err) { + mlog_errno(err); + return err; + } + } else { + pcount--; + pblock++; + } bh = NULL; - err = ocfs2_read_quota_block(gqinode, blk, &bh); + err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh); if (err) { mlog_errno(err); return err; @@ -223,6 +221,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, int err = 0, new = 0, ja_type; struct buffer_head *bh = NULL; handle_t *handle = journal_current_handle(); + u64 pblock, pcount; if (!handle) { mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " @@ -235,12 +234,11 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; } - mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); if (gqinode->i_size < off + len) { loff_t rounded_end = ocfs2_align_bytes_to_blocks(sb, off + len); - /* Space is already allocated in ocfs2_global_read_dquot() */ + /* Space is already allocated in ocfs2_acquire_dquot() */ err = ocfs2_simple_size_update(gqinode, oinfo->dqi_gqi_bh, rounded_end); @@ -248,13 +246,20 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, goto out; new = 1; } + err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, &pcount, NULL); + if (err) { + mlog_errno(err); + goto out; + } /* Not rewriting whole block? */ if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && !new) { - err = ocfs2_read_quota_block(gqinode, blk, &bh); + err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh); ja_type = OCFS2_JOURNAL_ACCESS_WRITE; } else { - err = ocfs2_get_quota_block(gqinode, blk, &bh); + bh = sb_getblk(sb, pblock); + if (!bh) + err = -ENOMEM; ja_type = OCFS2_JOURNAL_ACCESS_CREATE; } if (err) { @@ -279,13 +284,11 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, brelse(bh); out: if (err) { - mutex_unlock(&gqinode->i_mutex); mlog_errno(err); return err; } gqinode->i_version++; ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); - mutex_unlock(&gqinode->i_mutex); return len; } @@ -303,11 +306,23 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) else WARN_ON(bh != oinfo->dqi_gqi_bh); spin_unlock(&dq_data_lock); + if (ex) { + mutex_lock(&oinfo->dqi_gqinode->i_mutex); + down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); + } else { + down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); + } return 0; } void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) { + if (ex) { + up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); + mutex_unlock(&oinfo->dqi_gqinode->i_mutex); + } else { + up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); + } ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); brelse(oinfo->dqi_gqi_bh); spin_lock(&dq_data_lock); @@ -458,75 +473,6 @@ static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) OCFS2_QUOTA_BLOCK_UPDATE_CREDITS; } -/* Read in information from global quota file and acquire a reference to it. - * dquot_acquire() has already started the transaction and locked quota file */ -int ocfs2_global_read_dquot(struct dquot *dquot) -{ - int err, err2, ex = 0; - struct super_block *sb = dquot->dq_sb; - int type = dquot->dq_type; - struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; - struct ocfs2_super *osb = OCFS2_SB(sb); - struct inode *gqinode = info->dqi_gqinode; - int need_alloc = ocfs2_global_qinit_alloc(sb, type); - handle_t *handle = NULL; - - err = ocfs2_qinfo_lock(info, 0); - if (err < 0) - goto out; - err = qtree_read_dquot(&info->dqi_gi, dquot); - if (err < 0) - goto out_qlock; - OCFS2_DQUOT(dquot)->dq_use_count++; - OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; - OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; - ocfs2_qinfo_unlock(info, 0); - - if (!dquot->dq_off) { /* No real quota entry? */ - ex = 1; - /* - * Add blocks to quota file before we start a transaction since - * locking allocators ranks above a transaction start - */ - WARN_ON(journal_current_handle()); - down_write(&OCFS2_I(gqinode)->ip_alloc_sem); - err = ocfs2_extend_no_holes(gqinode, - gqinode->i_size + (need_alloc << sb->s_blocksize_bits), - gqinode->i_size); - up_write(&OCFS2_I(gqinode)->ip_alloc_sem); - if (err < 0) - goto out; - } - - handle = ocfs2_start_trans(osb, - ocfs2_calc_global_qinit_credits(sb, type)); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto out; - } - err = ocfs2_qinfo_lock(info, ex); - if (err < 0) - goto out_trans; - err = qtree_write_dquot(&info->dqi_gi, dquot); - if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { - err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); - if (!err) - err = err2; - } -out_qlock: - if (ex) - ocfs2_qinfo_unlock(info, 1); - else - ocfs2_qinfo_unlock(info, 0); -out_trans: - if (handle) - ocfs2_commit_trans(osb, handle); -out: - if (err < 0) - mlog_errno(err); - return err; -} - /* Sync local information about quota modifications with global quota file. * Caller must have started the transaction and obtained exclusive lock for * global quota file inode */ @@ -742,6 +688,10 @@ static int ocfs2_release_dquot(struct dquot *dquot) mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); + mutex_lock(&dquot->dq_lock); + /* Check whether we are not racing with some other dqget() */ + if (atomic_read(&dquot->dq_count) > 1) + goto out; status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) goto out; @@ -752,30 +702,113 @@ static int ocfs2_release_dquot(struct dquot *dquot) mlog_errno(status); goto out_ilock; } - status = dquot_release(dquot); + + status = ocfs2_global_release_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + status = ocfs2_local_release_dquot(handle, dquot); + /* + * If we fail here, we cannot do much as global structure is + * already released. So just complain... + */ + if (status < 0) + mlog_errno(status); + clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); +out_trans: ocfs2_commit_trans(osb, handle); out_ilock: ocfs2_unlock_global_qf(oinfo, 1); out: + mutex_unlock(&dquot->dq_lock); mlog_exit(status); return status; } +/* + * Read global dquot structure from disk or create it if it does + * not exist. Also update use count of the global structure and + * create structure in node-local quota file. + */ static int ocfs2_acquire_dquot(struct dquot *dquot) { - struct ocfs2_mem_dqinfo *oinfo = - sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; - int status = 0; + int status = 0, err; + int ex = 0; + struct super_block *sb = dquot->dq_sb; + struct ocfs2_super *osb = OCFS2_SB(sb); + int type = dquot->dq_type; + struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; + struct inode *gqinode = info->dqi_gqinode; + int need_alloc = ocfs2_global_qinit_alloc(sb, type); + handle_t *handle; - mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); - /* We need an exclusive lock, because we're going to update use count - * and instantiate possibly new dquot structure */ - status = ocfs2_lock_global_qf(oinfo, 1); + mlog_entry("id=%u, type=%d", dquot->dq_id, type); + mutex_lock(&dquot->dq_lock); + /* + * We need an exclusive lock, because we're going to update use count + * and instantiate possibly new dquot structure + */ + status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; - status = dquot_acquire(dquot); - ocfs2_unlock_global_qf(oinfo, 1); + if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { + status = ocfs2_qinfo_lock(info, 0); + if (status < 0) + goto out_dq; + status = qtree_read_dquot(&info->dqi_gi, dquot); + ocfs2_qinfo_unlock(info, 0); + if (status < 0) + goto out_dq; + } + set_bit(DQ_READ_B, &dquot->dq_flags); + + OCFS2_DQUOT(dquot)->dq_use_count++; + OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; + OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + if (!dquot->dq_off) { /* No real quota entry? */ + ex = 1; + /* + * Add blocks to quota file before we start a transaction since + * locking allocators ranks above a transaction start + */ + WARN_ON(journal_current_handle()); + status = ocfs2_extend_no_holes(gqinode, + gqinode->i_size + (need_alloc << sb->s_blocksize_bits), + gqinode->i_size); + if (status < 0) + goto out_dq; + } + + handle = ocfs2_start_trans(osb, + ocfs2_calc_global_qinit_credits(sb, type)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + goto out_dq; + } + status = ocfs2_qinfo_lock(info, ex); + if (status < 0) + goto out_trans; + status = qtree_write_dquot(&info->dqi_gi, dquot); + if (ex && info_dirty(sb_dqinfo(sb, type))) { + err = __ocfs2_global_write_info(sb, type); + if (!status) + status = err; + } + ocfs2_qinfo_unlock(info, ex); +out_trans: + ocfs2_commit_trans(osb, handle); +out_dq: + ocfs2_unlock_global_qf(info, 1); + if (status < 0) + goto out; + + status = ocfs2_create_local_dquot(dquot); + if (status < 0) + goto out; + set_bit(DQ_ACTIVE_B, &dquot->dq_flags); out: + mutex_unlock(&dquot->dq_lock); mlog_exit(status); return status; } @@ -820,7 +853,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) mlog_errno(status); goto out_ilock; } + mutex_lock(&sb_dqopt(sb)->dqio_mutex); status = ocfs2_sync_dquot(dquot); + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); if (status < 0) { mlog_errno(status); goto out_trans; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 962e8380852b..778947f0e951 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -22,6 +22,7 @@ #include "dlmglue.h" #include "quota.h" #include "uptodate.h" +#include "super.h" /* Number of local quota structures per block */ static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) @@ -129,6 +130,39 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, return 0; } +/* + * Read quota block from a given logical offset. + * + * This function acquires ip_alloc_sem and thus it must not be called with a + * transaction started. + */ +static int ocfs2_read_quota_block(struct inode *inode, u64 v_block, + struct buffer_head **bh) +{ + int rc = 0; + struct buffer_head *tmp = *bh; + + if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { + ocfs2_error(inode->i_sb, + "Quota file %llu is probably corrupted! Requested " + "to read block %Lu but file has size only %Lu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)v_block, + (unsigned long long)i_size_read(inode)); + return -EIO; + } + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, + ocfs2_validate_quota_block); + if (rc) + mlog_errno(rc); + + /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; + + return rc; +} + /* Check whether we understand format of quota files */ static int ocfs2_local_check_quota_file(struct super_block *sb, int type) { @@ -972,10 +1006,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( } /* Initialize chunk header */ - down_read(&OCFS2_I(lqinode)->ip_alloc_sem); status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, &p_blkno, NULL, NULL); - up_read(&OCFS2_I(lqinode)->ip_alloc_sem); if (status < 0) { mlog_errno(status); goto out_trans; @@ -1003,10 +1035,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( ocfs2_journal_dirty(handle, bh); /* Initialize new block with structures */ - down_read(&OCFS2_I(lqinode)->ip_alloc_sem); status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1, &p_blkno, NULL, NULL); - up_read(&OCFS2_I(lqinode)->ip_alloc_sem); if (status < 0) { mlog_errno(status); goto out_trans; @@ -1103,10 +1133,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( } /* Get buffer from the just added block */ - down_read(&OCFS2_I(lqinode)->ip_alloc_sem); status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, &p_blkno, NULL, NULL); - up_read(&OCFS2_I(lqinode)->ip_alloc_sem); if (status < 0) { mlog_errno(status); goto out; @@ -1187,7 +1215,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private) } /* Create dquot in the local file for given id */ -static int ocfs2_create_local_dquot(struct dquot *dquot) +int ocfs2_create_local_dquot(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; int type = dquot->dq_type; @@ -1237,36 +1265,11 @@ out: return status; } -/* Create entry in local file for dquot, load data from the global file */ -static int ocfs2_local_read_dquot(struct dquot *dquot) -{ - int status; - - mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type); - - status = ocfs2_global_read_dquot(dquot); - if (status < 0) { - mlog_errno(status); - goto out_err; - } - - /* Now create entry in the local quota file */ - status = ocfs2_create_local_dquot(dquot); - if (status < 0) { - mlog_errno(status); - goto out_err; - } - mlog_exit(0); - return 0; -out_err: - mlog_exit(status); - return status; -} - -/* Release dquot structure from local quota file. ocfs2_release_dquot() has - * already started a transaction and obtained exclusive lock for global - * quota file. */ -static int ocfs2_local_release_dquot(struct dquot *dquot) +/* + * Release dquot structure from local quota file. ocfs2_release_dquot() has + * already started a transaction and written all changes to global quota file + */ +int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) { int status; int type = dquot->dq_type; @@ -1274,15 +1277,6 @@ static int ocfs2_local_release_dquot(struct dquot *dquot) struct super_block *sb = dquot->dq_sb; struct ocfs2_local_disk_chunk *dchunk; int offset; - handle_t *handle = journal_current_handle(); - - BUG_ON(!handle); - /* First write all local changes to global file */ - status = ocfs2_global_release_dquot(dquot); - if (status < 0) { - mlog_errno(status); - goto out; - } status = ocfs2_journal_access_dq(handle, INODE_CACHE(sb_dqopt(sb)->files[type]), @@ -1315,9 +1309,7 @@ static const struct quota_format_ops ocfs2_format_ops = { .read_file_info = ocfs2_local_read_info, .write_file_info = ocfs2_global_write_info, .free_file_info = ocfs2_local_free_info, - .read_dqblk = ocfs2_local_read_dquot, .commit_dqblk = ocfs2_local_write_dquot, - .release_dqblk = ocfs2_local_release_dquot, }; struct quota_format_type ocfs2_quota_format = { -- cgit v1.2.2 From 832d09cf1438bd172f69478bde74f20f05ec0115 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 11 May 2010 17:04:14 +0200 Subject: ocfs2: Fix estimate of credits needed for quota allocation We were missing reservation of a journal credit for modification of quota file inode when creating new dquot structure in the global quota file. Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/ocfs2/quota_global.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 1e3e0d5b3ae7..734995823740 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -468,9 +468,10 @@ static int ocfs2_global_qinit_alloc(struct super_block *sb, int type) static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) { - /* We modify all the allocated blocks, tree root, and info block */ + /* We modify all the allocated blocks, tree root, info block and + * the inode */ return (ocfs2_global_qinit_alloc(sb, type) + 2) * - OCFS2_QUOTA_BLOCK_UPDATE_CREDITS; + OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + 1; } /* Sync local information about quota modifications with global quota file. -- cgit v1.2.2 From 741e128933448e589a85286e535078b24f4cf568 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 May 2010 18:05:15 +0200 Subject: ocfs2: Fix NULL pointer deref when writing local dquot commit_dqblk() can write quota info to global file. That is actually a bad thing to do because if we are just modifying local quota file, we are not prepared (do not hold proper locks, do not have transaction credits) to do a modification of the global quota file. So do not use commit_dqblk() and instead call our writing function directly. Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/ocfs2/quota.h | 1 + fs/ocfs2/quota_global.c | 20 ++++++++++---------- fs/ocfs2/quota_local.c | 3 +-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 903ffa933d53..196fcb52d95d 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -109,6 +109,7 @@ int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, struct buffer_head **bh); int ocfs2_create_local_dquot(struct dquot *dquot); int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); +int ocfs2_local_write_dquot(struct dquot *dquot); extern const struct dquot_operations ocfs2_quota_operations; extern struct quota_format_type ocfs2_quota_format; diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 734995823740..2bb35fe00511 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -612,14 +612,13 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) } mutex_lock(&sb_dqopt(sb)->dqio_mutex); status = ocfs2_sync_dquot(dquot); - mutex_unlock(&sb_dqopt(sb)->dqio_mutex); if (status < 0) mlog_errno(status); /* We have to write local structure as well... */ - dquot_mark_dquot_dirty(dquot); - status = dquot_commit(dquot); + status = ocfs2_local_write_dquot(dquot); if (status < 0) mlog_errno(status); + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); ocfs2_commit_trans(osb, handle); out_ilock: ocfs2_unlock_global_qf(oinfo, 1); @@ -658,7 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot) mlog_errno(status); goto out; } - status = dquot_commit(dquot); + mutex_lock(&sb_dqopt(dquot->dq_sb)->dqio_mutex); + status = ocfs2_local_write_dquot(dquot); + mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex); ocfs2_commit_trans(osb, handle); out: mlog_exit(status); @@ -831,7 +832,6 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) struct ocfs2_super *osb = OCFS2_SB(sb); mlog_entry("id=%u, type=%d", dquot->dq_id, type); - dquot_mark_dquot_dirty(dquot); /* In case user set some limits, sync dquot immediately to global * quota file so that information propagates quicker */ @@ -856,14 +856,14 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) } mutex_lock(&sb_dqopt(sb)->dqio_mutex); status = ocfs2_sync_dquot(dquot); - mutex_unlock(&sb_dqopt(sb)->dqio_mutex); if (status < 0) { mlog_errno(status); - goto out_trans; + goto out_dlock; } /* Now write updated local dquot structure */ - status = dquot_commit(dquot); -out_trans: + status = ocfs2_local_write_dquot(dquot); +out_dlock: + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); ocfs2_commit_trans(osb, handle); out_ilock: ocfs2_unlock_global_qf(oinfo, 1); @@ -915,7 +915,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot) } const struct dquot_operations ocfs2_quota_operations = { - .write_dquot = ocfs2_write_dquot, + /* We never make dquot dirty so .write_dquot is never called */ .acquire_dquot = ocfs2_acquire_dquot, .release_dquot = ocfs2_release_dquot, .mark_dirty = ocfs2_mark_dquot_dirty, diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 778947f0e951..551a6bff9f2c 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -892,7 +892,7 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) } /* Write dquot to local quota file */ -static int ocfs2_local_write_dquot(struct dquot *dquot) +int ocfs2_local_write_dquot(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); @@ -1309,7 +1309,6 @@ static const struct quota_format_ops ocfs2_format_ops = { .read_file_info = ocfs2_local_read_info, .write_file_info = ocfs2_global_write_info, .free_file_info = ocfs2_local_free_info, - .commit_dqblk = ocfs2_local_write_dquot, }; struct quota_format_type ocfs2_quota_format = { -- cgit v1.2.2 From 52a9ee281cfb26fffce1d6c409fb4b1f4aa8a766 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 May 2010 20:18:45 +0200 Subject: ocfs2: Use __dquot_transfer to avoid lock inversion dquot_transfer() acquires own references to dquots via dqget(). Thus it waits for dq_lock which creates a lock inversion because dq_lock ranks above transaction start but transaction is already started in ocfs2_setattr(). Fix the problem by passing own references directly to __dquot_transfer. Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/ocfs2/file.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e127c53ec2e7..97e54b9e654b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -933,9 +933,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *bh = NULL; handle_t *handle = NULL; - int qtype; - struct dquot *transfer_from[MAXQUOTAS] = { }; struct dquot *transfer_to[MAXQUOTAS] = { }; + int qtype; mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); @@ -1019,9 +1018,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, USRQUOTA); - transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, - USRQUOTA); - if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { + if (!transfer_to[USRQUOTA]) { status = -ESRCH; goto bail_unlock; } @@ -1031,9 +1028,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, GRPQUOTA); - transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, - GRPQUOTA); - if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { + if (!transfer_to[GRPQUOTA]) { status = -ESRCH; goto bail_unlock; } @@ -1045,7 +1040,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) mlog_errno(status); goto bail_unlock; } - status = dquot_transfer(inode, attr); + status = __dquot_transfer(inode, transfer_to); if (status < 0) goto bail_commit; } else { @@ -1085,10 +1080,8 @@ bail: brelse(bh); /* Release quota pointers in case we acquired them */ - for (qtype = 0; qtype < MAXQUOTAS; qtype++) { + for (qtype = 0; qtype < MAXQUOTAS; qtype++) dqput(transfer_to[qtype]); - dqput(transfer_from[qtype]); - } if (!status && attr->ia_valid & ATTR_MODE) { status = ocfs2_acl_chmod(inode); -- cgit v1.2.2 From c06bcbfa1ed8daaeb2a262f372b411207891e229 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 13 May 2010 22:14:53 +0200 Subject: ocfs2: Fix lock inversion in quotas during umount We cannot cancel delayed work from ocfs2_local_free_info because that is called with dqonoff_mutex held and the work it cancels requires dqonoff_mutex to finish. Cancel the work before acquiring dqonoff_mutex. Acked-by: Joel Becker Signed-off-by: Jan Kara --- fs/ocfs2/quota_local.c | 4 ---- fs/ocfs2/super.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 551a6bff9f2c..8bd70d4d184d 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -816,10 +816,6 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) int mark_clean = 1, len; int status; - /* At this point we know there are no more dquots and thus - * even if there's some sync in the pdflush queue, it won't - * find any dquots and return without doing anything */ - cancel_delayed_work_sync(&oinfo->dqi_sync_work); iput(oinfo->dqi_gqinode); ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); ocfs2_lock_res_free(&oinfo->dqi_gqlock); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1c2c39f6f0b6..2c26ce251cb3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -938,12 +938,16 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) int type; struct inode *inode; struct super_block *sb = osb->sb; + struct ocfs2_mem_dqinfo *oinfo; /* We mostly ignore errors in this function because there's not much * we can do when we see them */ for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_loaded(sb, type)) continue; + /* Cancel periodic syncing before we grab dqonoff_mutex */ + oinfo = sb_dqinfo(sb, type)->dqi_priv; + cancel_delayed_work_sync(&oinfo->dqi_sync_work); inode = igrab(sb->s_dquot.files[type]); /* Turn off quotas. This will remove all dquot structures from * memory and so they will be automatically synced to global -- cgit v1.2.2 From 1907131bbeabb33db313bad34f3ec1a5faedbd64 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Mon, 17 May 2010 18:36:03 +0200 Subject: dquot: Detect partial write error to quota file in write_blk() and add printk_ratelimit for quota error messages This patch changes quota_tree.c:write_blk() to detect error caused by partial write to quota file and add a macro to limit control printed quota error messages so we won't fill up dmesg with a corrupted quota file. Signed-off-by: Jiaying Zhang Signed-off-by: Jan Kara --- fs/quota/quota_tree.c | 46 +++++++++++++++++++++++++++------------------- fs/quota/quota_tree.h | 6 ++++++ fs/quota/quota_v2.c | 6 +++--- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index 5b7f7416ec7a..24f03407eeb5 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -60,9 +60,17 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) { struct super_block *sb = info->dqi_sb; + ssize_t ret; - return sb->s_op->quota_write(sb, info->dqi_type, buf, + ret = sb->s_op->quota_write(sb, info->dqi_type, buf, info->dqi_usable_bs, blk << info->dqi_blocksize_bits); + if (ret != info->dqi_usable_bs) { + q_warn(KERN_WARNING "VFS: dquota write failed on " + "dev %s\n", sb->s_id); + if (ret >= 0) + ret = -EIO; + } + return ret; } /* Remove empty block from list and return it */ @@ -152,7 +160,7 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf, dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); /* No matter whether write succeeds block is out of list */ if (write_blk(info, blk, buf) < 0) - printk(KERN_ERR + q_warn(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk); return 0; @@ -244,7 +252,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { *err = remove_free_dqentry(info, buf, blk); if (*err < 0) { - printk(KERN_ERR "VFS: find_free_dqentry(): Can't " + q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't " "remove block (%u) from entry free list.\n", blk); goto out_buf; @@ -268,7 +276,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, #endif *err = write_blk(info, blk, buf); if (*err < 0) { - printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " + q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " "data block %u.\n", blk); goto out_buf; } @@ -303,7 +311,7 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, } else { ret = read_blk(info, *treeblk, buf); if (ret < 0) { - printk(KERN_ERR "VFS: Can't read tree quota block " + q_warn(KERN_ERR "VFS: Can't read tree quota block " "%u.\n", *treeblk); goto out_buf; } @@ -365,7 +373,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) if (!dquot->dq_off) { ret = dq_insert_tree(info, dquot); if (ret < 0) { - printk(KERN_ERR "VFS: Error %zd occurred while " + q_warn(KERN_ERR "VFS: Error %zd occurred while " "creating quota.\n", ret); kfree(ddquot); return ret; @@ -377,7 +385,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, dquot->dq_off); if (ret != info->dqi_entry_size) { - printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", + q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n", sb->s_id); if (ret >= 0) ret = -ENOSPC; @@ -402,14 +410,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, if (!buf) return -ENOMEM; if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { - printk(KERN_ERR "VFS: Quota structure has offset to other " + q_warn(KERN_ERR "VFS: Quota structure has offset to other " "block (%u) than it should (%u).\n", blk, (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); goto out_buf; } ret = read_blk(info, blk, buf); if (ret < 0) { - printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); + q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk); goto out_buf; } dh = (struct qt_disk_dqdbheader *)buf; @@ -419,7 +427,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, if (ret >= 0) ret = put_free_dqblk(info, buf, blk); if (ret < 0) { - printk(KERN_ERR "VFS: Can't move quota data block (%u) " + q_warn(KERN_ERR "VFS: Can't move quota data block (%u) " "to free list.\n", blk); goto out_buf; } @@ -432,14 +440,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, /* Insert will write block itself */ ret = insert_free_dqentry(info, buf, blk); if (ret < 0) { - printk(KERN_ERR "VFS: Can't insert quota data " + q_warn(KERN_ERR "VFS: Can't insert quota data " "block (%u) to free entry list.\n", blk); goto out_buf; } } else { ret = write_blk(info, blk, buf); if (ret < 0) { - printk(KERN_ERR "VFS: Can't write quota data " + q_warn(KERN_ERR "VFS: Can't write quota data " "block %u\n", blk); goto out_buf; } @@ -464,7 +472,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, return -ENOMEM; ret = read_blk(info, *blk, buf); if (ret < 0) { - printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); + q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); goto out_buf; } newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); @@ -488,7 +496,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, } else { ret = write_blk(info, *blk, buf); if (ret < 0) - printk(KERN_ERR "VFS: Can't write quota tree " + q_warn(KERN_ERR "VFS: Can't write quota tree " "block %u.\n", *blk); } } @@ -521,7 +529,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, return -ENOMEM; ret = read_blk(info, blk, buf); if (ret < 0) { - printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); goto out_buf; } ddquot = buf + sizeof(struct qt_disk_dqdbheader); @@ -531,7 +539,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, ddquot += info->dqi_entry_size; } if (i == qtree_dqstr_in_blk(info)) { - printk(KERN_ERR "VFS: Quota for id %u referenced " + q_warn(KERN_ERR "VFS: Quota for id %u referenced " "but not present.\n", dquot->dq_id); ret = -EIO; goto out_buf; @@ -556,7 +564,7 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, return -ENOMEM; ret = read_blk(info, blk, buf); if (ret < 0) { - printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); goto out_buf; } ret = 0; @@ -599,7 +607,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) offset = find_dqentry(info, dquot); if (offset <= 0) { /* Entry not present? */ if (offset < 0) - printk(KERN_ERR "VFS: Can't read quota " + q_warn(KERN_ERR "VFS: Can't read quota " "structure for id %u.\n", dquot->dq_id); dquot->dq_off = 0; set_bit(DQ_FAKE_B, &dquot->dq_flags); @@ -617,7 +625,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) if (ret != info->dqi_entry_size) { if (ret >= 0) ret = -EIO; - printk(KERN_ERR "VFS: Error while reading quota " + q_warn(KERN_ERR "VFS: Error while reading quota " "structure for id %u.\n", dquot->dq_id); set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h index a1ab8db81a51..ccc3e71fb1d8 100644 --- a/fs/quota/quota_tree.h +++ b/fs/quota/quota_tree.h @@ -22,4 +22,10 @@ struct qt_disk_dqdbheader { #define QT_TREEOFF 1 /* Offset of tree in file in blocks */ +#define q_warn(fmt, args...) \ +do { \ + if (printk_ratelimit()) \ + printk(fmt, ## args); \ +} while(0) + #endif /* _LINUX_QUOTAIO_TREE_H */ diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index e3da02f4986f..135206af1458 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -63,7 +63,7 @@ static int v2_read_header(struct super_block *sb, int type, size = sb->s_op->quota_read(sb, type, (char *)dqhead, sizeof(struct v2_disk_dqheader), 0); if (size != sizeof(struct v2_disk_dqheader)) { - printk(KERN_WARNING "quota_v2: Failed header read:" + q_warn(KERN_WARNING "quota_v2: Failed header read:" " expected=%zd got=%zd\n", sizeof(struct v2_disk_dqheader), size); return 0; @@ -106,7 +106,7 @@ static int v2_read_file_info(struct super_block *sb, int type) size = sb->s_op->quota_read(sb, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { - printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", + q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", sb->s_id); return -1; } @@ -167,7 +167,7 @@ static int v2_write_file_info(struct super_block *sb, int type) size = sb->s_op->quota_write(sb, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { - printk(KERN_WARNING "Can't write info structure on device %s.\n", + q_warn(KERN_WARNING "Can't write info structure on device %s.\n", sb->s_id); return -1; } -- cgit v1.2.2