diff options
author | Adrian Hunter <ext-adrian.hunter@nokia.com> | 2008-08-22 07:23:35 -0400 |
---|---|---|
committer | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2008-08-25 07:34:02 -0400 |
commit | 601c0bc46753007be011b513ba4fc50ed8e30aef (patch) | |
tree | 39017065a8418b60362686a7771afe138e100c08 /fs/ubifs | |
parent | 761e29f3bb19b05bea55285dfdf2d28e001a63b8 (diff) |
UBIFS: allow for racing between GC and TNC
The TNC mutex is unlocked prematurely when reading leaf nodes
with non-hashed keys. This is unsafe because the node may be
moved by garbage collection and the eraseblock unmapped, although
that has never actually happened during stress testing.
This patch fixes the flaw by detecting the race and retrying with
the TNC mutex locked.
Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Diffstat (limited to 'fs/ubifs')
-rw-r--r-- | fs/ubifs/gc.c | 6 | ||||
-rw-r--r-- | fs/ubifs/misc.h | 17 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 109 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 6 |
4 files changed, 87 insertions, 51 deletions
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..13f1019c859f 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
344 | if (err) | 344 | if (err) |
345 | goto out; | 345 | goto out; |
346 | 346 | ||
347 | /* Allow for races with TNC */ | ||
348 | c->gced_lnum = lnum; | ||
349 | smp_wmb(); | ||
350 | c->gc_seq += 1; | ||
351 | smp_wmb(); | ||
352 | |||
347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
349 | err = LEB_RETAINED; | 355 | err = LEB_RETAINED; |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..87ced4c74a61 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -325,4 +325,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
326 | } | 326 | } |
327 | 327 | ||
328 | /** | ||
329 | * ubifs_tnc_lookup - look up a file-system node. | ||
330 | * @c: UBIFS file-system description object | ||
331 | * @key: node key to lookup | ||
332 | * @node: the node is returned here | ||
333 | * | ||
334 | * This function look up and reads node with key @key. The caller has to make | ||
335 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
336 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
337 | * case of failure. | ||
338 | */ | ||
339 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
340 | const union ubifs_key *key, void *node) | ||
341 | { | ||
342 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
343 | } | ||
344 | |||
328 | #endif /* __UBIFS_MISC_H__ */ | 345 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 4fbc5921688f..7da209ab9378 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
507 | ret = 0; | 507 | ret = 0; |
508 | } | 508 | } |
509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
512 | return ret; | 512 | return ret; |
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /** | 1384 | /** |
1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
1389 | * | 1389 | * |
1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
1393 | * case of failure. | ||
1394 | */ | 1393 | */ |
1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
1396 | void *node) | ||
1397 | { | 1395 | { |
1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
1399 | struct ubifs_znode *znode; | ||
1400 | struct ubifs_zbranch zbr, *zt; | ||
1401 | |||
1402 | mutex_lock(&c->tnc_mutex); | ||
1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | ||
1404 | if (!found) { | ||
1405 | err = -ENOENT; | ||
1406 | goto out; | ||
1407 | } else if (found < 0) { | ||
1408 | err = found; | ||
1409 | goto out; | ||
1410 | } | ||
1411 | zt = &znode->zbranch[n]; | ||
1412 | if (is_hash_key(c, key)) { | ||
1413 | /* | ||
1414 | * In this case the leaf node cache gets used, so we pass the | ||
1415 | * address of the zbranch and keep the mutex locked | ||
1416 | */ | ||
1417 | err = tnc_read_node_nm(c, zt, node); | ||
1418 | goto out; | ||
1419 | } | ||
1420 | zbr = znode->zbranch[n]; | ||
1421 | mutex_unlock(&c->tnc_mutex); | ||
1422 | |||
1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1424 | return err; | ||
1425 | 1397 | ||
1426 | out: | 1398 | gced_lnum = c->gced_lnum; |
1427 | mutex_unlock(&c->tnc_mutex); | 1399 | smp_rmb(); |
1428 | return err; | 1400 | gc_seq2 = c->gc_seq; |
1401 | /* Same seq means no GC */ | ||
1402 | if (gc_seq1 == gc_seq2) | ||
1403 | return 0; | ||
1404 | /* Different by more than 1 means we don't know */ | ||
1405 | if (gc_seq1 + 1 != gc_seq2) | ||
1406 | return 1; | ||
1407 | /* | ||
1408 | * We have seen the sequence number has increased by 1. Now we need to | ||
1409 | * be sure we read the right LEB number, so read it again. | ||
1410 | */ | ||
1411 | smp_rmb(); | ||
1412 | if (gced_lnum != c->gced_lnum) | ||
1413 | return 1; | ||
1414 | /* Finally we can check lnum */ | ||
1415 | if (gced_lnum == lnum) | ||
1416 | return 1; | ||
1417 | return 0; | ||
1429 | } | 1418 | } |
1430 | 1419 | ||
1431 | /** | 1420 | /** |
@@ -1436,16 +1425,19 @@ out: | |||
1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
1438 | * | 1427 | * |
1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
1441 | */ | 1432 | */ |
1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
1444 | { | 1435 | { |
1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
1448 | 1439 | ||
1440 | again: | ||
1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1451 | if (!found) { | 1443 | if (!found) { |
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
1456 | goto out; | 1448 | goto out; |
1457 | } | 1449 | } |
1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
1451 | if (lnum) { | ||
1452 | *lnum = zt->lnum; | ||
1453 | *offs = zt->offs; | ||
1454 | } | ||
1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
1460 | /* | 1456 | /* |
1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
1463 | */ | 1459 | */ |
1464 | *lnum = zt->lnum; | ||
1465 | *offs = zt->offs; | ||
1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
1467 | goto out; | 1461 | goto out; |
1468 | } | 1462 | } |
1463 | if (safely) { | ||
1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
1469 | gc_seq1 = c->gc_seq; | ||
1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
1471 | 1471 | ||
1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1475 | return err; | ||
1476 | } | ||
1474 | 1477 | ||
1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
1476 | return err; | 1479 | if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
1480 | /* | ||
1481 | * The node may have been GC'ed out from under us so try again | ||
1482 | * while keeping the TNC mutex locked. | ||
1483 | */ | ||
1484 | safely = 1; | ||
1485 | goto again; | ||
1486 | } | ||
1487 | return 0; | ||
1477 | 1488 | ||
1478 | out: | 1489 | out: |
1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..7828d69ca4f8 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -1028,6 +1028,8 @@ struct ubifs_mount_opts { | |||
1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
1029 | * @idx_gc: list of index LEBs that have been garbage collected | 1029 | * @idx_gc: list of index LEBs that have been garbage collected |
1030 | * @idx_gc_cnt: number of elements on the idx_gc list | 1030 | * @idx_gc_cnt: number of elements on the idx_gc list |
1031 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
1032 | * @gced_lnum: last non-index LEB that was garbage collected | ||
1031 | * | 1033 | * |
1032 | * @infos_list: links all 'ubifs_info' objects | 1034 | * @infos_list: links all 'ubifs_info' objects |
1033 | * @umount_mutex: serializes shrinker and un-mount | 1035 | * @umount_mutex: serializes shrinker and un-mount |
@@ -1257,6 +1259,8 @@ struct ubifs_info { | |||
1257 | void *sbuf; | 1259 | void *sbuf; |
1258 | struct list_head idx_gc; | 1260 | struct list_head idx_gc; |
1259 | int idx_gc_cnt; | 1261 | int idx_gc_cnt; |
1262 | volatile int gc_seq; | ||
1263 | volatile int gced_lnum; | ||
1260 | 1264 | ||
1261 | struct list_head infos_list; | 1265 | struct list_head infos_list; |
1262 | struct mutex umount_mutex; | 1266 | struct mutex umount_mutex; |
@@ -1451,8 +1455,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
1451 | /* tnc.c */ | 1455 | /* tnc.c */ |
1452 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1456 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
1453 | struct ubifs_znode **zn, int *n); | 1457 | struct ubifs_znode **zn, int *n); |
1454 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
1455 | void *node); | ||
1456 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1458 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
1457 | void *node, const struct qstr *nm); | 1459 | void *node, const struct qstr *nm); |
1458 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1460 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |