diff options
author | Eric Ren <zren@suse.com> | 2017-02-22 18:40:44 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 19:41:27 -0500 |
commit | b891fa5024a95c77e0d6fd6655cb74af6fb77f46 (patch) | |
tree | e2d441eb65792f70e2f156f771c4e33949a8731e /fs/ocfs2/file.c | |
parent | 439a36b8ef38657f765b80b775e2885338d72451 (diff) |
ocfs2: fix deadlock issue when taking inode lock at vfs entry points
Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html
The reason why taking cluster inode lock at vfs entry points opens up a
self deadlock window, is explained in the previous patch of this series.
So far, we have seen two different code paths that have this issue.
1. do_sys_open
may_open
inode_permission
ocfs2_permission
ocfs2_inode_lock() <=== take PR
generic_permission
get_acl
ocfs2_iop_get_acl
ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
chmod_common
notify_change
ocfs2_setattr <=== take EX
posix_acl_chmod
get_acl
ocfs2_iop_get_acl <=== take PR
ocfs2_iop_set_acl <=== take EX
Fixes them by adding the tracking logic (in the previous patch) for these
funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().
Link: http://lkml.kernel.org/r/20170117100948.11657-3-zren@suse.com
Signed-off-by: Eric Ren <zren@suse.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r-- | fs/ocfs2/file.c | 58 |
1 files changed, 45 insertions, 13 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c4889655d32b..7b6a146327d7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1138,6 +1138,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1138 | handle_t *handle = NULL; | 1138 | handle_t *handle = NULL; |
1139 | struct dquot *transfer_to[MAXQUOTAS] = { }; | 1139 | struct dquot *transfer_to[MAXQUOTAS] = { }; |
1140 | int qtype; | 1140 | int qtype; |
1141 | int had_lock; | ||
1142 | struct ocfs2_lock_holder oh; | ||
1141 | 1143 | ||
1142 | trace_ocfs2_setattr(inode, dentry, | 1144 | trace_ocfs2_setattr(inode, dentry, |
1143 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1145 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
@@ -1173,11 +1175,30 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1173 | } | 1175 | } |
1174 | } | 1176 | } |
1175 | 1177 | ||
1176 | status = ocfs2_inode_lock(inode, &bh, 1); | 1178 | had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); |
1177 | if (status < 0) { | 1179 | if (had_lock < 0) { |
1178 | if (status != -ENOENT) | 1180 | status = had_lock; |
1179 | mlog_errno(status); | ||
1180 | goto bail_unlock_rw; | 1181 | goto bail_unlock_rw; |
1182 | } else if (had_lock) { | ||
1183 | /* | ||
1184 | * As far as we know, ocfs2_setattr() could only be the first | ||
1185 | * VFS entry point in the call chain of recursive cluster | ||
1186 | * locking issue. | ||
1187 | * | ||
1188 | * For instance: | ||
1189 | * chmod_common() | ||
1190 | * notify_change() | ||
1191 | * ocfs2_setattr() | ||
1192 | * posix_acl_chmod() | ||
1193 | * ocfs2_iop_get_acl() | ||
1194 | * | ||
1195 | * But, we're not 100% sure if it's always true, because the | ||
1196 | * ordering of the VFS entry points in the call chain is out | ||
1197 | * of our control. So, we'd better dump the stack here to | ||
1198 | * catch the other cases of recursive locking. | ||
1199 | */ | ||
1200 | mlog(ML_ERROR, "Another case of recursive locking:\n"); | ||
1201 | dump_stack(); | ||
1181 | } | 1202 | } |
1182 | inode_locked = 1; | 1203 | inode_locked = 1; |
1183 | 1204 | ||
@@ -1260,8 +1281,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1260 | bail_commit: | 1281 | bail_commit: |
1261 | ocfs2_commit_trans(osb, handle); | 1282 | ocfs2_commit_trans(osb, handle); |
1262 | bail_unlock: | 1283 | bail_unlock: |
1263 | if (status) { | 1284 | if (status && inode_locked) { |
1264 | ocfs2_inode_unlock(inode, 1); | 1285 | ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); |
1265 | inode_locked = 0; | 1286 | inode_locked = 0; |
1266 | } | 1287 | } |
1267 | bail_unlock_rw: | 1288 | bail_unlock_rw: |
@@ -1279,7 +1300,7 @@ bail: | |||
1279 | mlog_errno(status); | 1300 | mlog_errno(status); |
1280 | } | 1301 | } |
1281 | if (inode_locked) | 1302 | if (inode_locked) |
1282 | ocfs2_inode_unlock(inode, 1); | 1303 | ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); |
1283 | 1304 | ||
1284 | brelse(bh); | 1305 | brelse(bh); |
1285 | return status; | 1306 | return status; |
@@ -1320,21 +1341,32 @@ bail: | |||
1320 | 1341 | ||
1321 | int ocfs2_permission(struct inode *inode, int mask) | 1342 | int ocfs2_permission(struct inode *inode, int mask) |
1322 | { | 1343 | { |
1323 | int ret; | 1344 | int ret, had_lock; |
1345 | struct ocfs2_lock_holder oh; | ||
1324 | 1346 | ||
1325 | if (mask & MAY_NOT_BLOCK) | 1347 | if (mask & MAY_NOT_BLOCK) |
1326 | return -ECHILD; | 1348 | return -ECHILD; |
1327 | 1349 | ||
1328 | ret = ocfs2_inode_lock(inode, NULL, 0); | 1350 | had_lock = ocfs2_inode_lock_tracker(inode, NULL, 0, &oh); |
1329 | if (ret) { | 1351 | if (had_lock < 0) { |
1330 | if (ret != -ENOENT) | 1352 | ret = had_lock; |
1331 | mlog_errno(ret); | ||
1332 | goto out; | 1353 | goto out; |
1354 | } else if (had_lock) { | ||
1355 | /* See comments in ocfs2_setattr() for details. | ||
1356 | * The call chain of this case could be: | ||
1357 | * do_sys_open() | ||
1358 | * may_open() | ||
1359 | * inode_permission() | ||
1360 | * ocfs2_permission() | ||
1361 | * ocfs2_iop_get_acl() | ||
1362 | */ | ||
1363 | mlog(ML_ERROR, "Another case of recursive locking:\n"); | ||
1364 | dump_stack(); | ||
1333 | } | 1365 | } |
1334 | 1366 | ||
1335 | ret = generic_permission(inode, mask); | 1367 | ret = generic_permission(inode, mask); |
1336 | 1368 | ||
1337 | ocfs2_inode_unlock(inode, 0); | 1369 | ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); |
1338 | out: | 1370 | out: |
1339 | return ret; | 1371 | return ret; |
1340 | } | 1372 | } |