diff options
author | Oleg Nesterov <oleg@redhat.com> | 2015-07-19 18:50:55 -0400 |
---|---|---|
committer | Oleg Nesterov <oleg@redhat.com> | 2015-08-15 07:52:09 -0400 |
commit | f4b554af9931585174d4913b482eacab75858964 (patch) | |
tree | e01977e85b9568688a28058ebacd82f34be6ae14 | |
parent | bee9182d955227f01ff3b80c4cb6acca9bb40b11 (diff) |
fix the broken lockdep logic in __sb_start_write()
1. wait_event(frozen < level) without rwsem_acquire_read() is just
wrong from lockdep perspective. If we are going to deadlock
because the caller is buggy, lockdep can't detect this problem.
2. __sb_start_write() can race with thaw_super() + freeze_super(),
and after "goto retry" the 2nd acquire_freeze_lock() is wrong.
3. The "tell lockdep we are doing trylock" hack doesn't look nice.
I think this is correct, but this logic should be more explicit.
Yes, the recursive read_lock() is fine if we hold the lock on a
higher level. But we do not need to fool lockdep. If we can not
deadlock in this case then try-lock must not fail and we can use
use wait == F throughout this code.
Note: as Dave Chinner explains, the "trylock" hack and the fat comment
can be probably removed. But this needs a separate change and it will
be trivial: just kill __sb_start_write() and rename do_sb_start_write()
back to __sb_start_write().
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jan Kara <jack@suse.com>
-rw-r--r-- | fs/super.c | 73 |
1 files changed, 40 insertions, 33 deletions
diff --git a/fs/super.c b/fs/super.c index b61372354f2b..24a76bcd62a5 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -1158,38 +1158,11 @@ void __sb_end_write(struct super_block *sb, int level) | |||
1158 | } | 1158 | } |
1159 | EXPORT_SYMBOL(__sb_end_write); | 1159 | EXPORT_SYMBOL(__sb_end_write); |
1160 | 1160 | ||
1161 | #ifdef CONFIG_LOCKDEP | 1161 | static int do_sb_start_write(struct super_block *sb, int level, bool wait, |
1162 | /* | ||
1163 | * We want lockdep to tell us about possible deadlocks with freezing but | ||
1164 | * it's it bit tricky to properly instrument it. Getting a freeze protection | ||
1165 | * works as getting a read lock but there are subtle problems. XFS for example | ||
1166 | * gets freeze protection on internal level twice in some cases, which is OK | ||
1167 | * only because we already hold a freeze protection also on higher level. Due | ||
1168 | * to these cases we have to tell lockdep we are doing trylock when we | ||
1169 | * already hold a freeze protection for a higher freeze level. | ||
1170 | */ | ||
1171 | static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock, | ||
1172 | unsigned long ip) | 1162 | unsigned long ip) |
1173 | { | 1163 | { |
1174 | int i; | 1164 | if (wait) |
1175 | 1165 | rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 0, ip); | |
1176 | if (!trylock) { | ||
1177 | for (i = 0; i < level - 1; i++) | ||
1178 | if (lock_is_held(&sb->s_writers.lock_map[i])) { | ||
1179 | trylock = true; | ||
1180 | break; | ||
1181 | } | ||
1182 | } | ||
1183 | rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip); | ||
1184 | } | ||
1185 | #endif | ||
1186 | |||
1187 | /* | ||
1188 | * This is an internal function, please use sb_start_{write,pagefault,intwrite} | ||
1189 | * instead. | ||
1190 | */ | ||
1191 | int __sb_start_write(struct super_block *sb, int level, bool wait) | ||
1192 | { | ||
1193 | retry: | 1166 | retry: |
1194 | if (unlikely(sb->s_writers.frozen >= level)) { | 1167 | if (unlikely(sb->s_writers.frozen >= level)) { |
1195 | if (!wait) | 1168 | if (!wait) |
@@ -1198,9 +1171,6 @@ retry: | |||
1198 | sb->s_writers.frozen < level); | 1171 | sb->s_writers.frozen < level); |
1199 | } | 1172 | } |
1200 | 1173 | ||
1201 | #ifdef CONFIG_LOCKDEP | ||
1202 | acquire_freeze_lock(sb, level, !wait, _RET_IP_); | ||
1203 | #endif | ||
1204 | percpu_counter_inc(&sb->s_writers.counter[level-1]); | 1174 | percpu_counter_inc(&sb->s_writers.counter[level-1]); |
1205 | /* | 1175 | /* |
1206 | * Make sure counter is updated before we check for frozen. | 1176 | * Make sure counter is updated before we check for frozen. |
@@ -1211,8 +1181,45 @@ retry: | |||
1211 | __sb_end_write(sb, level); | 1181 | __sb_end_write(sb, level); |
1212 | goto retry; | 1182 | goto retry; |
1213 | } | 1183 | } |
1184 | |||
1185 | if (!wait) | ||
1186 | rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 1, ip); | ||
1214 | return 1; | 1187 | return 1; |
1215 | } | 1188 | } |
1189 | |||
1190 | /* | ||
1191 | * This is an internal function, please use sb_start_{write,pagefault,intwrite} | ||
1192 | * instead. | ||
1193 | */ | ||
1194 | int __sb_start_write(struct super_block *sb, int level, bool wait) | ||
1195 | { | ||
1196 | bool force_trylock = false; | ||
1197 | int ret; | ||
1198 | |||
1199 | #ifdef CONFIG_LOCKDEP | ||
1200 | /* | ||
1201 | * We want lockdep to tell us about possible deadlocks with freezing | ||
1202 | * but it's it bit tricky to properly instrument it. Getting a freeze | ||
1203 | * protection works as getting a read lock but there are subtle | ||
1204 | * problems. XFS for example gets freeze protection on internal level | ||
1205 | * twice in some cases, which is OK only because we already hold a | ||
1206 | * freeze protection also on higher level. Due to these cases we have | ||
1207 | * to use wait == F (trylock mode) which must not fail. | ||
1208 | */ | ||
1209 | if (wait) { | ||
1210 | int i; | ||
1211 | |||
1212 | for (i = 0; i < level - 1; i++) | ||
1213 | if (lock_is_held(&sb->s_writers.lock_map[i])) { | ||
1214 | force_trylock = true; | ||
1215 | break; | ||
1216 | } | ||
1217 | } | ||
1218 | #endif | ||
1219 | ret = do_sb_start_write(sb, level, wait && !force_trylock, _RET_IP_); | ||
1220 | WARN_ON(force_trylock & !ret); | ||
1221 | return ret; | ||
1222 | } | ||
1216 | EXPORT_SYMBOL(__sb_start_write); | 1223 | EXPORT_SYMBOL(__sb_start_write); |
1217 | 1224 | ||
1218 | /** | 1225 | /** |