aboutsummaryrefslogtreecommitdiffstats
path: root/fs/super.c
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2015-07-19 18:50:55 -0400
committerOleg Nesterov <oleg@redhat.com>2015-08-15 07:52:09 -0400
commitf4b554af9931585174d4913b482eacab75858964 (patch)
treee01977e85b9568688a28058ebacd82f34be6ae14 /fs/super.c
parentbee9182d955227f01ff3b80c4cb6acca9bb40b11 (diff)
fix the broken lockdep logic in __sb_start_write()
1. wait_event(frozen < level) without rwsem_acquire_read() is just wrong from lockdep perspective. If we are going to deadlock because the caller is buggy, lockdep can't detect this problem. 2. __sb_start_write() can race with thaw_super() + freeze_super(), and after "goto retry" the 2nd acquire_freeze_lock() is wrong. 3. The "tell lockdep we are doing trylock" hack doesn't look nice. I think this is correct, but this logic should be more explicit. Yes, the recursive read_lock() is fine if we hold the lock on a higher level. But we do not need to fool lockdep. If we can not deadlock in this case then try-lock must not fail and we can use use wait == F throughout this code. Note: as Dave Chinner explains, the "trylock" hack and the fat comment can be probably removed. But this needs a separate change and it will be trivial: just kill __sb_start_write() and rename do_sb_start_write() back to __sb_start_write(). Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Jan Kara <jack@suse.com>
Diffstat (limited to 'fs/super.c')
-rw-r--r--fs/super.c73
1 files changed, 40 insertions, 33 deletions
diff --git a/fs/super.c b/fs/super.c
index b61372354f2b..24a76bcd62a5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1158,38 +1158,11 @@ void __sb_end_write(struct super_block *sb, int level)
1158} 1158}
1159EXPORT_SYMBOL(__sb_end_write); 1159EXPORT_SYMBOL(__sb_end_write);
1160 1160
1161#ifdef CONFIG_LOCKDEP 1161static int do_sb_start_write(struct super_block *sb, int level, bool wait,
1162/*
1163 * We want lockdep to tell us about possible deadlocks with freezing but
1164 * it's it bit tricky to properly instrument it. Getting a freeze protection
1165 * works as getting a read lock but there are subtle problems. XFS for example
1166 * gets freeze protection on internal level twice in some cases, which is OK
1167 * only because we already hold a freeze protection also on higher level. Due
1168 * to these cases we have to tell lockdep we are doing trylock when we
1169 * already hold a freeze protection for a higher freeze level.
1170 */
1171static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
1172 unsigned long ip) 1162 unsigned long ip)
1173{ 1163{
1174 int i; 1164 if (wait)
1175 1165 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 0, ip);
1176 if (!trylock) {
1177 for (i = 0; i < level - 1; i++)
1178 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1179 trylock = true;
1180 break;
1181 }
1182 }
1183 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
1184}
1185#endif
1186
1187/*
1188 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
1189 * instead.
1190 */
1191int __sb_start_write(struct super_block *sb, int level, bool wait)
1192{
1193retry: 1166retry:
1194 if (unlikely(sb->s_writers.frozen >= level)) { 1167 if (unlikely(sb->s_writers.frozen >= level)) {
1195 if (!wait) 1168 if (!wait)
@@ -1198,9 +1171,6 @@ retry:
1198 sb->s_writers.frozen < level); 1171 sb->s_writers.frozen < level);
1199 } 1172 }
1200 1173
1201#ifdef CONFIG_LOCKDEP
1202 acquire_freeze_lock(sb, level, !wait, _RET_IP_);
1203#endif
1204 percpu_counter_inc(&sb->s_writers.counter[level-1]); 1174 percpu_counter_inc(&sb->s_writers.counter[level-1]);
1205 /* 1175 /*
1206 * Make sure counter is updated before we check for frozen. 1176 * Make sure counter is updated before we check for frozen.
@@ -1211,8 +1181,45 @@ retry:
1211 __sb_end_write(sb, level); 1181 __sb_end_write(sb, level);
1212 goto retry; 1182 goto retry;
1213 } 1183 }
1184
1185 if (!wait)
1186 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 1, ip);
1214 return 1; 1187 return 1;
1215} 1188}
1189
1190/*
1191 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
1192 * instead.
1193 */
1194int __sb_start_write(struct super_block *sb, int level, bool wait)
1195{
1196 bool force_trylock = false;
1197 int ret;
1198
1199#ifdef CONFIG_LOCKDEP
1200 /*
1201 * We want lockdep to tell us about possible deadlocks with freezing
1202 * but it's it bit tricky to properly instrument it. Getting a freeze
1203 * protection works as getting a read lock but there are subtle
1204 * problems. XFS for example gets freeze protection on internal level
1205 * twice in some cases, which is OK only because we already hold a
1206 * freeze protection also on higher level. Due to these cases we have
1207 * to use wait == F (trylock mode) which must not fail.
1208 */
1209 if (wait) {
1210 int i;
1211
1212 for (i = 0; i < level - 1; i++)
1213 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1214 force_trylock = true;
1215 break;
1216 }
1217 }
1218#endif
1219 ret = do_sb_start_write(sb, level, wait && !force_trylock, _RET_IP_);
1220 WARN_ON(force_trylock & !ret);
1221 return ret;
1222}
1216EXPORT_SYMBOL(__sb_start_write); 1223EXPORT_SYMBOL(__sb_start_write);
1217 1224
1218/** 1225/**