diff options
author | Yongqiang Yang <xiaoqiangnk@gmail.com> | 2011-07-26 21:35:44 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-07-26 21:35:44 -0400 |
commit | 8f82f840ec6ab873f520364d443ff6fa1b3f8e22 (patch) | |
tree | 72d90cdc8f7311ef913b7471e53f718bebbf7414 /fs/ext4 | |
parent | 2d859db3e4a82a365572592d57624a5f996ed0ec (diff) |
ext4: prevent parallel resizers by atomic bit ops
Before this patch, parallel resizers are allowed and protected by a
mutex lock, actually, there is no need to support parallel resizer, so
this patch prevents parallel resizers by atmoic bit ops, like
lock_page() and unlock_page() do.
To do this, the patch removed the mutex lock s_resize_lock from struct
ext4_sb_info and added a unsigned long field named s_resize_flags
which inidicates if there is a resizer.
Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 7 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 12 | ||||
-rw-r--r-- | fs/ext4/resize.c | 55 | ||||
-rw-r--r-- | fs/ext4/super.c | 2 |
4 files changed, 36 insertions, 40 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 62cee2b6fe79..bb0f7760c7c8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1127,7 +1127,8 @@ struct ext4_sb_info { | |||
1127 | struct journal_s *s_journal; | 1127 | struct journal_s *s_journal; |
1128 | struct list_head s_orphan; | 1128 | struct list_head s_orphan; |
1129 | struct mutex s_orphan_lock; | 1129 | struct mutex s_orphan_lock; |
1130 | struct mutex s_resize_lock; | 1130 | unsigned long s_resize_flags; /* Flags indicating if there |
1131 | is a resizer */ | ||
1131 | unsigned long s_commit_interval; | 1132 | unsigned long s_commit_interval; |
1132 | u32 s_max_batch_time; | 1133 | u32 s_max_batch_time; |
1133 | u32 s_min_batch_time; | 1134 | u32 s_min_batch_time; |
@@ -2269,6 +2270,10 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
2269 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | 2270 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; |
2270 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | 2271 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; |
2271 | 2272 | ||
2273 | #define EXT4_RESIZING 0 | ||
2274 | extern int ext4_resize_begin(struct super_block *sb); | ||
2275 | extern void ext4_resize_end(struct super_block *sb); | ||
2276 | |||
2272 | #endif /* __KERNEL__ */ | 2277 | #endif /* __KERNEL__ */ |
2273 | 2278 | ||
2274 | #endif /* _EXT4_H */ | 2279 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 808c554e773f..f18bfe37aff8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -202,8 +202,9 @@ setversion_out: | |||
202 | struct super_block *sb = inode->i_sb; | 202 | struct super_block *sb = inode->i_sb; |
203 | int err, err2=0; | 203 | int err, err2=0; |
204 | 204 | ||
205 | if (!capable(CAP_SYS_RESOURCE)) | 205 | err = ext4_resize_begin(sb); |
206 | return -EPERM; | 206 | if (err) |
207 | return err; | ||
207 | 208 | ||
208 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | 209 | if (get_user(n_blocks_count, (__u32 __user *)arg)) |
209 | return -EFAULT; | 210 | return -EFAULT; |
@@ -221,6 +222,7 @@ setversion_out: | |||
221 | if (err == 0) | 222 | if (err == 0) |
222 | err = err2; | 223 | err = err2; |
223 | mnt_drop_write(filp->f_path.mnt); | 224 | mnt_drop_write(filp->f_path.mnt); |
225 | ext4_resize_end(sb); | ||
224 | 226 | ||
225 | return err; | 227 | return err; |
226 | } | 228 | } |
@@ -271,8 +273,9 @@ mext_out: | |||
271 | struct super_block *sb = inode->i_sb; | 273 | struct super_block *sb = inode->i_sb; |
272 | int err, err2=0; | 274 | int err, err2=0; |
273 | 275 | ||
274 | if (!capable(CAP_SYS_RESOURCE)) | 276 | err = ext4_resize_begin(sb); |
275 | return -EPERM; | 277 | if (err) |
278 | return err; | ||
276 | 279 | ||
277 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, | 280 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, |
278 | sizeof(input))) | 281 | sizeof(input))) |
@@ -291,6 +294,7 @@ mext_out: | |||
291 | if (err == 0) | 294 | if (err == 0) |
292 | err = err2; | 295 | err = err2; |
293 | mnt_drop_write(filp->f_path.mnt); | 296 | mnt_drop_write(filp->f_path.mnt); |
297 | ext4_resize_end(sb); | ||
294 | 298 | ||
295 | return err; | 299 | return err; |
296 | } | 300 | } |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 80bbc9c60c24..0213f631271f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -16,6 +16,25 @@ | |||
16 | 16 | ||
17 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
18 | 18 | ||
19 | int ext4_resize_begin(struct super_block *sb) | ||
20 | { | ||
21 | int ret = 0; | ||
22 | |||
23 | if (!capable(CAP_SYS_RESOURCE)) | ||
24 | return -EPERM; | ||
25 | |||
26 | if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) | ||
27 | ret = -EBUSY; | ||
28 | |||
29 | return ret; | ||
30 | } | ||
31 | |||
32 | void ext4_resize_end(struct super_block *sb) | ||
33 | { | ||
34 | clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); | ||
35 | smp_mb__after_clear_bit(); | ||
36 | } | ||
37 | |||
19 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) | 38 | #define outside(b, first, last) ((b) < (first) || (b) >= (last)) |
20 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) | 39 | #define inside(b, first, last) ((b) >= (first) && (b) < (last)) |
21 | 40 | ||
@@ -181,11 +200,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
181 | if (IS_ERR(handle)) | 200 | if (IS_ERR(handle)) |
182 | return PTR_ERR(handle); | 201 | return PTR_ERR(handle); |
183 | 202 | ||
184 | mutex_lock(&sbi->s_resize_lock); | 203 | BUG_ON(input->group != sbi->s_groups_count); |
185 | if (input->group != sbi->s_groups_count) { | ||
186 | err = -EBUSY; | ||
187 | goto exit_journal; | ||
188 | } | ||
189 | 204 | ||
190 | if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { | 205 | if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { |
191 | err = PTR_ERR(bh); | 206 | err = PTR_ERR(bh); |
@@ -285,7 +300,6 @@ exit_bh: | |||
285 | brelse(bh); | 300 | brelse(bh); |
286 | 301 | ||
287 | exit_journal: | 302 | exit_journal: |
288 | mutex_unlock(&sbi->s_resize_lock); | ||
289 | if ((err2 = ext4_journal_stop(handle)) && !err) | 303 | if ((err2 = ext4_journal_stop(handle)) && !err) |
290 | err = err2; | 304 | err = err2; |
291 | 305 | ||
@@ -799,13 +813,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
799 | goto exit_put; | 813 | goto exit_put; |
800 | } | 814 | } |
801 | 815 | ||
802 | mutex_lock(&sbi->s_resize_lock); | ||
803 | if (input->group != sbi->s_groups_count) { | ||
804 | ext4_warning(sb, "multiple resizers run on filesystem!"); | ||
805 | err = -EBUSY; | ||
806 | goto exit_journal; | ||
807 | } | ||
808 | |||
809 | if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) | 816 | if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) |
810 | goto exit_journal; | 817 | goto exit_journal; |
811 | 818 | ||
@@ -829,7 +836,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
829 | /* | 836 | /* |
830 | * OK, now we've set up the new group. Time to make it active. | 837 | * OK, now we've set up the new group. Time to make it active. |
831 | * | 838 | * |
832 | * We do not lock all allocations via s_resize_lock | ||
833 | * so we have to be safe wrt. concurrent accesses the group | 839 | * so we have to be safe wrt. concurrent accesses the group |
834 | * data. So we need to be careful to set all of the relevant | 840 | * data. So we need to be careful to set all of the relevant |
835 | * group descriptor data etc. *before* we enable the group. | 841 | * group descriptor data etc. *before* we enable the group. |
@@ -886,13 +892,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
886 | * | 892 | * |
887 | * The precise rules we use are: | 893 | * The precise rules we use are: |
888 | * | 894 | * |
889 | * * Writers of s_groups_count *must* hold s_resize_lock | ||
890 | * AND | ||
891 | * * Writers must perform a smp_wmb() after updating all dependent | 895 | * * Writers must perform a smp_wmb() after updating all dependent |
892 | * data and before modifying the groups count | 896 | * data and before modifying the groups count |
893 | * | 897 | * |
894 | * * Readers must hold s_resize_lock over the access | ||
895 | * OR | ||
896 | * * Readers must perform an smp_rmb() after reading the groups count | 898 | * * Readers must perform an smp_rmb() after reading the groups count |
897 | * and before reading any dependent data. | 899 | * and before reading any dependent data. |
898 | * | 900 | * |
@@ -937,7 +939,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
937 | ext4_handle_dirty_super(handle, sb); | 939 | ext4_handle_dirty_super(handle, sb); |
938 | 940 | ||
939 | exit_journal: | 941 | exit_journal: |
940 | mutex_unlock(&sbi->s_resize_lock); | ||
941 | if ((err2 = ext4_journal_stop(handle)) && !err) | 942 | if ((err2 = ext4_journal_stop(handle)) && !err) |
942 | err = err2; | 943 | err = err2; |
943 | if (!err) { | 944 | if (!err) { |
@@ -972,9 +973,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
972 | int err; | 973 | int err; |
973 | ext4_group_t group; | 974 | ext4_group_t group; |
974 | 975 | ||
975 | /* We don't need to worry about locking wrt other resizers just | ||
976 | * yet: we're going to revalidate es->s_blocks_count after | ||
977 | * taking the s_resize_lock below. */ | ||
978 | o_blocks_count = ext4_blocks_count(es); | 976 | o_blocks_count = ext4_blocks_count(es); |
979 | 977 | ||
980 | if (test_opt(sb, DEBUG)) | 978 | if (test_opt(sb, DEBUG)) |
@@ -995,7 +993,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
995 | 993 | ||
996 | if (n_blocks_count < o_blocks_count) { | 994 | if (n_blocks_count < o_blocks_count) { |
997 | ext4_warning(sb, "can't shrink FS - resize aborted"); | 995 | ext4_warning(sb, "can't shrink FS - resize aborted"); |
998 | return -EBUSY; | 996 | return -EINVAL; |
999 | } | 997 | } |
1000 | 998 | ||
1001 | /* Handle the remaining blocks in the last group only. */ | 999 | /* Handle the remaining blocks in the last group only. */ |
@@ -1038,24 +1036,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1038 | goto exit_put; | 1036 | goto exit_put; |
1039 | } | 1037 | } |
1040 | 1038 | ||
1041 | mutex_lock(&EXT4_SB(sb)->s_resize_lock); | ||
1042 | if (o_blocks_count != ext4_blocks_count(es)) { | ||
1043 | ext4_warning(sb, "multiple resizers run on filesystem!"); | ||
1044 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | ||
1045 | ext4_journal_stop(handle); | ||
1046 | err = -EBUSY; | ||
1047 | goto exit_put; | ||
1048 | } | ||
1049 | |||
1050 | if ((err = ext4_journal_get_write_access(handle, | 1039 | if ((err = ext4_journal_get_write_access(handle, |
1051 | EXT4_SB(sb)->s_sbh))) { | 1040 | EXT4_SB(sb)->s_sbh))) { |
1052 | ext4_warning(sb, "error %d on journal write access", err); | 1041 | ext4_warning(sb, "error %d on journal write access", err); |
1053 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | ||
1054 | ext4_journal_stop(handle); | 1042 | ext4_journal_stop(handle); |
1055 | goto exit_put; | 1043 | goto exit_put; |
1056 | } | 1044 | } |
1057 | ext4_blocks_count_set(es, o_blocks_count + add); | 1045 | ext4_blocks_count_set(es, o_blocks_count + add); |
1058 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | ||
1059 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1046 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1060 | o_blocks_count + add); | 1047 | o_blocks_count + add); |
1061 | /* We add the blocks to the bitmap and set the group need init bit */ | 1048 | /* We add the blocks to the bitmap and set the group need init bit */ |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 143d763729b4..cfe9f39c4ba2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -3500,7 +3500,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3500 | 3500 | ||
3501 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 3501 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
3502 | mutex_init(&sbi->s_orphan_lock); | 3502 | mutex_init(&sbi->s_orphan_lock); |
3503 | mutex_init(&sbi->s_resize_lock); | 3503 | sbi->s_resize_flags = 0; |
3504 | 3504 | ||
3505 | sb->s_root = NULL; | 3505 | sb->s_root = NULL; |
3506 | 3506 | ||