aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <ext-adrian.hunter@nokia.com>2008-08-22 07:23:35 -0400
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2008-08-25 07:34:02 -0400
commit601c0bc46753007be011b513ba4fc50ed8e30aef (patch)
tree39017065a8418b60362686a7771afe138e100c08
parent761e29f3bb19b05bea55285dfdf2d28e001a63b8 (diff)
UBIFS: allow for racing between GC and TNC
The TNC mutex is unlocked prematurely when reading leaf nodes with non-hashed keys. This is unsafe because the node may be moved by garbage collection and the eraseblock unmapped, although that has never actually happened during stress testing. This patch fixes the flaw by detecting the race and retrying with the TNC mutex locked. Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
-rw-r--r--fs/ubifs/gc.c6
-rw-r--r--fs/ubifs/misc.h17
-rw-r--r--fs/ubifs/tnc.c109
-rw-r--r--fs/ubifs/ubifs.h6
4 files changed, 87 insertions, 51 deletions
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac29081..13f1019c859f 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
344 if (err) 344 if (err)
345 goto out; 345 goto out;
346 346
347 /* Allow for races with TNC */
348 c->gced_lnum = lnum;
349 smp_wmb();
350 c->gc_seq += 1;
351 smp_wmb();
352
347 if (c->gc_lnum == -1) { 353 if (c->gc_lnum == -1) {
348 c->gc_lnum = lnum; 354 c->gc_lnum = lnum;
349 err = LEB_RETAINED; 355 err = LEB_RETAINED;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 87dabf9fe742..87ced4c74a61 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -325,4 +325,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
325 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 325 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
326} 326}
327 327
328/**
329 * ubifs_tnc_lookup - look up a file-system node.
330 * @c: UBIFS file-system description object
331 * @key: node key to lookup
332 * @node: the node is returned here
333 *
334 * This function look up and reads node with key @key. The caller has to make
335 * sure the @node buffer is large enough to fit the node. Returns zero in case
336 * of success, %-ENOENT if the node was not found, and a negative error code in
337 * case of failure.
338 */
339static inline int ubifs_tnc_lookup(struct ubifs_info *c,
340 const union ubifs_key *key, void *node)
341{
342 return ubifs_tnc_locate(c, key, node, NULL, NULL);
343}
344
328#endif /* __UBIFS_MISC_H__ */ 345#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 4fbc5921688f..7da209ab9378 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
506 if (keys_cmp(c, key, &node_key) != 0) 506 if (keys_cmp(c, key, &node_key) != 0)
507 ret = 0; 507 ret = 0;
508 } 508 }
509 if (ret == 0) 509 if (ret == 0 && c->replaying)
510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s", 510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); 511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
512 return ret; 512 return ret;
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1382} 1382}
1383 1383
1384/** 1384/**
1385 * ubifs_tnc_lookup - look up a file-system node. 1385 * maybe_leb_gced - determine if a LEB may have been garbage collected.
1386 * @c: UBIFS file-system description object 1386 * @c: UBIFS file-system description object
1387 * @key: node key to lookup 1387 * @lnum: LEB number
1388 * @node: the node is returned here 1388 * @gc_seq1: garbage collection sequence number
1389 * 1389 *
1390 * This function look up and reads node with key @key. The caller has to make 1390 * This function determines if @lnum may have been garbage collected since
1391 * sure the @node buffer is large enough to fit the node. Returns zero in case 1391 * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
1392 * of success, %-ENOENT if the node was not found, and a negative error code in 1392 * %0 is returned.
1393 * case of failure.
1394 */ 1393 */
1395int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, 1394static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
1396 void *node)
1397{ 1395{
1398 int found, n, err; 1396 int gc_seq2, gced_lnum;
1399 struct ubifs_znode *znode;
1400 struct ubifs_zbranch zbr, *zt;
1401
1402 mutex_lock(&c->tnc_mutex);
1403 found = ubifs_lookup_level0(c, key, &znode, &n);
1404 if (!found) {
1405 err = -ENOENT;
1406 goto out;
1407 } else if (found < 0) {
1408 err = found;
1409 goto out;
1410 }
1411 zt = &znode->zbranch[n];
1412 if (is_hash_key(c, key)) {
1413 /*
1414 * In this case the leaf node cache gets used, so we pass the
1415 * address of the zbranch and keep the mutex locked
1416 */
1417 err = tnc_read_node_nm(c, zt, node);
1418 goto out;
1419 }
1420 zbr = znode->zbranch[n];
1421 mutex_unlock(&c->tnc_mutex);
1422
1423 err = ubifs_tnc_read_node(c, &zbr, node);
1424 return err;
1425 1397
1426out: 1398 gced_lnum = c->gced_lnum;
1427 mutex_unlock(&c->tnc_mutex); 1399 smp_rmb();
1428 return err; 1400 gc_seq2 = c->gc_seq;
1401 /* Same seq means no GC */
1402 if (gc_seq1 == gc_seq2)
1403 return 0;
1404 /* Different by more than 1 means we don't know */
1405 if (gc_seq1 + 1 != gc_seq2)
1406 return 1;
1407 /*
1408 * We have seen the sequence number has increased by 1. Now we need to
1409 * be sure we read the right LEB number, so read it again.
1410 */
1411 smp_rmb();
1412 if (gced_lnum != c->gced_lnum)
1413 return 1;
1414 /* Finally we can check lnum */
1415 if (gced_lnum == lnum)
1416 return 1;
1417 return 0;
1429} 1418}
1430 1419
1431/** 1420/**
@@ -1436,16 +1425,19 @@ out:
1436 * @lnum: LEB number is returned here 1425 * @lnum: LEB number is returned here
1437 * @offs: offset is returned here 1426 * @offs: offset is returned here
1438 * 1427 *
1439 * This function is the same as 'ubifs_tnc_lookup()' but it returns the node 1428 * This function look up and reads node with key @key. The caller has to make
1440 * location also. See 'ubifs_tnc_lookup()'. 1429 * sure the @node buffer is large enough to fit the node. Returns zero in case
1430 * of success, %-ENOENT if the node was not found, and a negative error code in
1431 * case of failure. The node location can be returned in @lnum and @offs.
1441 */ 1432 */
1442int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1433int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1443 void *node, int *lnum, int *offs) 1434 void *node, int *lnum, int *offs)
1444{ 1435{
1445 int found, n, err; 1436 int found, n, err, safely = 0, gc_seq1;
1446 struct ubifs_znode *znode; 1437 struct ubifs_znode *znode;
1447 struct ubifs_zbranch zbr, *zt; 1438 struct ubifs_zbranch zbr, *zt;
1448 1439
1440again:
1449 mutex_lock(&c->tnc_mutex); 1441 mutex_lock(&c->tnc_mutex);
1450 found = ubifs_lookup_level0(c, key, &znode, &n); 1442 found = ubifs_lookup_level0(c, key, &znode, &n);
1451 if (!found) { 1443 if (!found) {
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1456 goto out; 1448 goto out;
1457 } 1449 }
1458 zt = &znode->zbranch[n]; 1450 zt = &znode->zbranch[n];
1451 if (lnum) {
1452 *lnum = zt->lnum;
1453 *offs = zt->offs;
1454 }
1459 if (is_hash_key(c, key)) { 1455 if (is_hash_key(c, key)) {
1460 /* 1456 /*
1461 * In this case the leaf node cache gets used, so we pass the 1457 * In this case the leaf node cache gets used, so we pass the
1462 * address of the zbranch and keep the mutex locked 1458 * address of the zbranch and keep the mutex locked
1463 */ 1459 */
1464 *lnum = zt->lnum;
1465 *offs = zt->offs;
1466 err = tnc_read_node_nm(c, zt, node); 1460 err = tnc_read_node_nm(c, zt, node);
1467 goto out; 1461 goto out;
1468 } 1462 }
1463 if (safely) {
1464 err = ubifs_tnc_read_node(c, zt, node);
1465 goto out;
1466 }
1467 /* Drop the TNC mutex prematurely and race with garbage collection */
1469 zbr = znode->zbranch[n]; 1468 zbr = znode->zbranch[n];
1469 gc_seq1 = c->gc_seq;
1470 mutex_unlock(&c->tnc_mutex); 1470 mutex_unlock(&c->tnc_mutex);
1471 1471
1472 *lnum = zbr.lnum; 1472 if (ubifs_get_wbuf(c, zbr.lnum)) {
1473 *offs = zbr.offs; 1473 /* We do not GC journal heads */
1474 err = ubifs_tnc_read_node(c, &zbr, node);
1475 return err;
1476 }
1474 1477
1475 err = ubifs_tnc_read_node(c, &zbr, node); 1478 err = fallible_read_node(c, key, &zbr, node);
1476 return err; 1479 if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
1480 /*
1481 * The node may have been GC'ed out from under us so try again
1482 * while keeping the TNC mutex locked.
1483 */
1484 safely = 1;
1485 goto again;
1486 }
1487 return 0;
1477 1488
1478out: 1489out:
1479 mutex_unlock(&c->tnc_mutex); 1490 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d7f706f7a302..7828d69ca4f8 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1028,6 +1028,8 @@ struct ubifs_mount_opts {
1028 * @sbuf: a buffer of LEB size used by GC and replay for scanning 1028 * @sbuf: a buffer of LEB size used by GC and replay for scanning
1029 * @idx_gc: list of index LEBs that have been garbage collected 1029 * @idx_gc: list of index LEBs that have been garbage collected
1030 * @idx_gc_cnt: number of elements on the idx_gc list 1030 * @idx_gc_cnt: number of elements on the idx_gc list
1031 * @gc_seq: incremented for every non-index LEB garbage collected
1032 * @gced_lnum: last non-index LEB that was garbage collected
1031 * 1033 *
1032 * @infos_list: links all 'ubifs_info' objects 1034 * @infos_list: links all 'ubifs_info' objects
1033 * @umount_mutex: serializes shrinker and un-mount 1035 * @umount_mutex: serializes shrinker and un-mount
@@ -1257,6 +1259,8 @@ struct ubifs_info {
1257 void *sbuf; 1259 void *sbuf;
1258 struct list_head idx_gc; 1260 struct list_head idx_gc;
1259 int idx_gc_cnt; 1261 int idx_gc_cnt;
1262 volatile int gc_seq;
1263 volatile int gced_lnum;
1260 1264
1261 struct list_head infos_list; 1265 struct list_head infos_list;
1262 struct mutex umount_mutex; 1266 struct mutex umount_mutex;
@@ -1451,8 +1455,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
1451/* tnc.c */ 1455/* tnc.c */
1452int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, 1456int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1453 struct ubifs_znode **zn, int *n); 1457 struct ubifs_znode **zn, int *n);
1454int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1455 void *node);
1456int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, 1458int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1457 void *node, const struct qstr *nm); 1459 void *node, const struct qstr *nm);
1458int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1460int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,